# 0. Import Data

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [4]:
data_frame = pd.read_csv('Churn.csv')
# reads comma-separated values (csv) file into DataFrame

In [13]:
X = pd.get_dummies(data_frame.drop(['Churn', 'Customer ID'], axis=1))
# get_dummies converts categorical variable into dummy/indicator values
# .drop drops specified labels from rows or columns of DataFrame
# axis=1 indicates we are dropping the whole column
y = data_frame['Churn'].apply(lambda x: 1 if x=='Yes' else 0)
# .apply called on the 'churn' series of the DataFrame invokes a function
# on the values of the series. we pass in a lambda function as a parameter
# A lambda function is a small anonymous function. each element in the 'churn'
# series will be run through the function. churn will be represented as a 1 or 0

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)
# Split arrays or matrices into random train and test subsets.
# we pass in the X DataFrame and the y churn series. we use 20% of
# the data as the test split

In [8]:
X_train.head() # view first 5 elements in X_train

Unnamed: 0,Senior Citizen,tenure,Monthly Charges,Gender_Female,Gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,Phone Service_No,...,Total Charges_995.35,Total Charges_996.45,Total Charges_996.85,Total Charges_996.95,Total Charges_997.65,Total Charges_997.75,Total Charges_998.1,Total Charges_999.45,Total Charges_999.8,Total Charges_999.9
5815,0,16,20.6,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
644,1,49,89.85,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4254,0,22,88.75,0,1,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
5918,0,4,19.95,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4377,0,45,100.3,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
y_train.head()

5815    0
644     0
4254    0
5918    0
4377    0
Name: Churn, dtype: int64

# 1. Import Dependencies

In [17]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score

# 2. Build and Compile Model

In [18]:
model = Sequential()
# Sequential groups a linear stack of layers into a tf.keras.Model.
model.add(Dense(units=32, activation='relu', input_dim=len(X_train.columns)))
# add a densely-connected NN layer with 32 neurons
# relu activation function: Applies the rectified linear unit activation function.
# basically runs max(x, 0) on the neuron output data 
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))
# final layer shape dictates what the output will look like. One unit
# means we will only get one number back. (yes or no)

In [19]:
model.compile(loss='binary_crossentropy', optimizer='sgd', metrics='accuracy')
# configures the model for training 
# the loss is the sum of how inaccurate the model's estimations are
# the optimizer is the strategy by which the model tries to reduce it's loss
# metrics allows us to evaluate how well the model is performing

# 3. Fit, Predict and Evaluate

In [20]:
model.fit(X_train, y_train, epochs=200, batch_size=32)
# TRAINING THE MODEL; X is the input data, y is the target data
# 200 epochs (iterations on a dataset)
# Epoch setting: training for longer (i.e. higher epochs) will often
# result in a more accurate model, but might lead to overfitting

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x1fc6aee2eb8>

In [22]:
y_hat = model.predict(X_test)
y_hat = [0 if val<0.5 else 1 for val in y_hat]
# we want a binary value instead of a float between 0 and 1



In [23]:
y_hat

[0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [24]:
accuracy_score(y_test, y_hat)

0.7828246983676366

# 4. Saving and Reloading

In [25]:
model.save('tfmodel')

INFO:tensorflow:Assets written to: tfmodel\assets


In [None]:
# if you wanted to reload a model again:
# del model
# model = load_model('tfmodel')