In [137]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Dense

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import os


In [138]:
# Load data
diabetes_data_file_path = os.path.join('input', 'Churn_Modelling.csv')
dataset = pd.read_csv(diabetes_data_file_path)

dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [139]:
features = [
    'CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance',
    'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary',
]
# X is our features, and y our target
X = dataset[features]    # X = dataset.iloc[:, 3:13].values
y = dataset.Exited       # y = dataset.iloc[:, 13].values


In [140]:
# Encoding categorical data
# in this case : 'Male' => 1, 'Female' => 0
le = LabelEncoder()
X.Gender = le.fit_transform(X.Gender)

ct = ColumnTransformer([('my_OHE', OneHotEncoder(), [1])], remainder='passthrough')
X = ct.fit_transform(X)

X = X[:, 1:] # get rid of CreditScore ?

# Split data into training and validation data, for both features and target.
train_X, val_X, train_y, val_y = train_test_split(X, y, test_size = 0.2, random_state = 0)

# apply feature scaling
sc = StandardScaler()
train_X = sc.fit_transform(train_X)
val_X = sc.transform(val_X)

In [141]:
# Part 2 - Now let's make the ANN!
# Initialising the ANN
model = Sequential()

# Adding the input layer and the first hidden layer
#        the second hidden layer
#        the output layer
# + compile ("Configures the model for training")
model.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
model.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Fitting the ANN to the Training set
# model.fit(train_X, train_y, batch_size=10, epochs=100)
model.fit(train_X, train_y, batch_size=10, epochs=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1c294716700>

In [153]:
# Predicting the Test set results
val_predictions = model.predict(val_X)
val_predictions = (val_predictions > 0.5) # transform float to bool

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(val_y, val_predictions)

print(cm)

[[1551   44]
 [ 275  130]]
