In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

In [None]:
df = pd.read_csv('Churn_Modelling.csv')
df.sample(5)

In [None]:
df.drop(['Surname', 'RowNumber'], axis=1, inplace=True)
df.sample(5)

In [None]:
df.dtypes 

In [None]:
cs_exited_n = df[df.Exited==0].CreditScore
cs_exited_y = df[df.Exited==1].CreditScore

plt.xlabel('Balance')
plt.ylabel('No of Customers')
plt.title('Customer Exited Prediction Visualization')

plt.hist([cs_exited_y,cs_exited_n], color=['red', 'green'], label=['Exited=Yes', 'Exited=No'])
plt.legend()

In [None]:
bal_exited_n = df[df.Exited==0].Balance 
bal_exited_y = df[df.Exited==1].Balance 

plt.xlabel('CreditScore')
plt.ylabel('No of Customers')
plt.title('Customer Exited Prediction Visualization')

plt.hist([bal_exited_y,bal_exited_n], color=['red', 'green'], label=['Exited=Yes', 'Exited=No'])
plt.legend()

In [None]:
t_exited_n = df[df.Exited==0].Tenure 
t_exited_y = df[df.Exited==1].Tenure 

plt.xlabel('Tenure')
plt.ylabel('No of Customers')
plt.title('Customer Exited Prediction Visualization')

plt.hist([t_exited_y,t_exited_n], color=['red', 'green'], label=['Exited=Yes', 'Exited=No'])
plt.legend()

In [None]:
def get_unique_val(df):
    for col in df:
        if df[col].dtypes == 'object':
            print(f'{col}: {df[col].unique()}')

get_unique_val(df)

In [None]:
df1 = pd.get_dummies(df, columns=['Geography', 'Gender'])
df1.sample(3)

In [None]:
df1.dtypes

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scale_cols = ['CustomerId', 'CreditScore', 'Age', 'Balance', 'Tenure', 'NumOfProducts', 'EstimatedSalary']
df1[scale_cols] = scaler.fit_transform(df1[scale_cols])
df1.shape

In [None]:
X = df1.drop('Exited', axis=1)
y= df1['Exited']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=5)


In [None]:
import tensorflow as tf 
from tensorflow import keras

In [None]:
model = keras.Sequential([
    keras.layers.Dense(14, input_shape=(14,), activation='relu'),
    keras.layers.Dense(8, activation='relu'),
    keras.layers.Dense(1, activation='relu')
])
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)
model.fit(X_train, y_train, epochs=100) 

In [None]:
model.evaluate(X_test, y_test)

In [None]:
yp = model.predict(X_test)
yp

In [None]:
y_pred = []
for i in yp:
    if i > 0.49:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [None]:
y_test[:10]

In [None]:
y_pred[:10]

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred, labels=[0, 1]))

In [None]:
cm = tf.math.confusion_matrix(labels=y_test, predictions=y_pred)
import seaborn as sn
plt.figure(figsize= (10,7))
sn.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('True')

In [None]:
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
def ANN(X_train, y_train, X_test, y_test, loss, weights):
    model = keras.Sequential([
    keras.layers.Dense(10, input_shape=(14,), activation='relu'),
    keras.layers.Dense(7, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam',
            loss=loss,
            metrics=['accuracy']
            )
    if weights==-1:
        model.fit(X_train, y_train, epochs=100)
    else:
        model.fit(X_train, y_train, epochs=100, class_weight=weights)
    mod_eval = model.evaluate(X_test, y_test)
    y_pred = np.round(model.predict(X_test))
    print('Classification Report: \n', classification_report(y_test, y_pred, labels=[0, 1]))
    return y_pred

Handling Data

In [None]:
count_val_0, count_val_1 = df1['Exited'].value_counts()

df1_count_0 = df1[df1['Exited']==0]
df1_count_1 = df1[df1['Exited']==1]
df1.shape

UNDERSAMPLING THE MAJORITY

In [None]:
df1_count_0.shape, df1_count_1.shape

In [None]:
df1_under = pd.concat([df1_count_1, df1_count_0.sample(count_val_1)])
df1_under.shape

In [None]:
count_val_0, count_val_1 = df1_under['Exited'].value_counts()
count_val_0, count_val_1

In [None]:
X = df1_under.drop('Exited', axis=1)
y = df1_under['Exited']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=5, stratify=y)
X_train.shape

In [None]:
y_pred = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
print('Classification Report: \n', classification_report(y_test, y_pred, labels=[0, 1]))

OVERSAMPLING MINORITY

In [None]:
count_val_0, count_val_1 = df1['Exited'].value_counts()

df1_count_0 = df1[df1['Exited']==0]
df1_count_1 = df1[df1['Exited']==1]

In [None]:
df1_over = pd.concat([df1_count_1.sample(count_val_0, replace=True), df1_count_0])
df1_over.shape

In [None]:
count_val_1 = df1_over['Exited'].value_counts()
count_val_1

In [None]:
X = df1_over.drop('Exited', axis=1)
y = df1_over['Exited']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=5, stratify=y)
X_train.shape

In [None]:
y_pred = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
print('Classification Report: \n', classification_report(y_test, y_pred, labels=[0, 1]))

SMOTE

In [None]:
X = df1.drop('Exited', axis=1)
y= df1['Exited']

In [None]:
from imblearn.over_sampling import SMOTE
smote = SMOTE(sampling_strategy='minority')
X_sm, y_sm = smote.fit_resample(X, y)
y_sm.value_counts()

In [None]:
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X_sm,y_sm, test_size=0.2, random_state=15, stratify=y_sm)

In [None]:
y_pred = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
print('Classification Report: \n', classification_report(y_test, y_pred, labels=[0, 1]))

ENSEMBLE

In [None]:
X = df1.drop('Exited', axis=1)
y= df1['Exited']
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=15, stratify=y)

In [None]:
df3 = X_train.copy()
df3['Exited'] = y_train
df3_class_0 = df3[df3['Exited']==0]
df3_class_1 = df3[df3['Exited']==1]

In [None]:
df3['Exited'].value_counts()

In [None]:
df3_class_1.shape, df3_class_0.shape, df3.shape

In [None]:
def get_train_data(df_majority, df_minority, start, end):
    df_train = pd.concat([df_majority[start:end], df_minority])
    X_train = df_train.drop('Exited', axis=1)
    y_train = df_train['Exited']
    
    return X_train, y_train

In [None]:
X_train, y_train = get_train_data(df3_class_0, df3_class_1, 0, 2000)
y_pred1 = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
print('Classification Report: \n', classification_report(y_test, y_pred1, labels=[0, 1]))

In [None]:
X_train, y_train = get_train_data(df3_class_0, df3_class_1, 2000, 4000)
y_pred2 = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
print('Classification Report: \n', classification_report(y_test, y_pred2, labels=[0, 1]))

In [None]:
print('Classification Report: \n', classification_report(y_test, y_pred3, labels=[0, 1]))

In [None]:
X_train, y_train = get_train_data(df3_class_0, df3_class_1, 4000, 6000)
y_pred3 = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
print('Classification Report: \n', classification_report(y_test, y_pred3, labels=[0, 1]))

In [None]:
X_train, y_train = get_train_data(df3_class_0, df3_class_1, 6000, 8000)
y_pred4 = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
print('Classification Report: \n', classification_report(y_test, y_pred4, labels=[0, 1]))