Bank customers survival or not classification

Bank customers survival or not classification | churn modeling

 # ANN using Stochastic Gradiant Descent

# %%  Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# %% Importing the dataset

dataset = pd.read_csv('Churn_Modelling.csv')
x = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

# %% Encoding categorical data

from sklearn.preprocessing import LabelEncoder, OneHotEncoder

le= LabelEncoder()

x[:, 1] = le.fit_transform(x[:, 1])
x[:, 2] = le.fit_transform(x[:, 2])

# %%  Creating Dataframe

df = pd.DataFrame(x,columns=dataset.columns[3:13])


# %%  ColumnTransformer , OneHotEncoding

from sklearn.compose import ColumnTransformer

transformer = ColumnTransformer(
    transformers=[
        ("OneHot",        # Just a name
         OneHotEncoder(), # The transformer class
         [1]              # The column(s) to be applied on.
         )
    ],
    remainder='passthrough' # do not apply anything to the remaining columns
)
x = transformer.fit_transform(x.tolist())
x = x.astype('float64')

# %%

# one_hot_encoder = OneHotEncoder(categorical_features=[1])
# x = one_hot_encoder.fit_transform(x).toarray()

x = x[:, 1:]

# %%  Splitting the dataset into the Training set and Test set

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size = 0.2, random_state = 0
)


# %%  Feature Scaling

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

# %%  Importing the keras library and packages

import keras
from keras.models import Sequential # to initialize the neural network
from keras.layers import Dense #to create (hidden) layers

# %%  Initializing the ANN

classifier = Sequential()

# %%  Adding the input layer and the first hidden layer 

# (relu -> rectifier function, uniform -> distribute weights uniformly)

classifier.add(
    Dense(units=6, kernel_initializer='uniform', activation='relu', input_dim=11)
)

# %%  Adding the second hidden layer

classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu'))

# %% Adding the output layer

# If target variable has for an example 3 categories so in units 3 will be written
# and for the activation function use 'softmax'

classifier.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))

# %%  Compiling the ANN
# adam is the type of stochastic gradient descent
# loss = it is needed in Stochastic gradient to optimize the weight (Like MSE for Linear regression)
# for loss it will be same as logistic linear regression-> logarithmic loss function
# in logarithmic if target variable has two categories -> binary_crossentropy,
# If more than two -> categorical_crossentropy
# metrics-> to increase to accuracy we are adding accuracy only , it contains list.
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# %%  Fitting the ANN to the Training set
classifier.fit(x_train, y_train, batch_size=10, epochs=100)

# %%  Predicting the Test set results
y_pred = classifier.predict(x_test)
y_pred = (y_pred > 0.5)

# %%

cl = []
for i in y_pred:
    if i > 0.5 :
        i = 'Survived'
        cl.append(i)
    else:
        i = 'Not Survived'
        cl.append(i)


y_pred = np.array(cl)
y_pred = y_pred.reshape(-1,1)


# %%  Creating confusion matrix for the model
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test.reshape(-1,1), y_pred.reshape(-1,1))

# %% How to check terms in are come in comfusion matrix

true = []
false = []


# True
for i in y_pred:
    if i == True:
        true.append(i)
print('Prediction true: ', len(true))

# False
for i in y_pred:
    if i == False:
        false.append(i)
print('Prediction false: ', len(false))



# %%  Confision Matrix terms

True_Positive = cm[0,0]
True_Negative = cm[1,1]
False_Positive = cm[1,0]
False_Negative = cm[0,1]

Actual_Positive = True_Positive + False_Negative
Actual_Negative = True_Negative + False_Positive
Predicted_Positive = True_Positive + False_Positive
Predicted_Negative = True_Negative + False_Negative

Total_Population = True_Positive + True_Negative + False_Positive + False_Negative

# %%  Accuracy , Precision, Sensitvity, Specificity,

Accuracy = (True_Positive + True_Negative)*100 / Total_Population 
Accuracy
# %%  Precision (Reduce Type 1 error)

Precision = True_Positive*100 / Predicted_Positive
Precision
# %%

Negative_Rate = True_Negative*100 / Predicted_Negative
Negative_Rate
# %%  Sensitivity or Recall should be high (Reduce Type 2 error)

Recall = True_Positive*100 / Actual_Positive
Recall

# %%  Miss Rate should be low

Miss_Rate = False_Negative*100 / Actual_Positive
Miss_Rate


# %%  F1 Score

F1 = 2* Precision * Recall / (Precision + Recall)
F1

# %%

Welcome To Easy Python & basic Machine Learning codes

Search This Blog

Bank customers survival or not classification | churn modeling

Comments

Post a Comment

Popular posts from this blog

Classification & Confusion Matrix & Accuracy Paradox

Python program to check if variable is of integer or string

Multiple classification from many of directories