In [23]:
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder

In [24]:
data = pd.read_csv('customer_churn.csv')

In [25]:
data['TotalCharges'].dtype

dtype('O')

In [26]:
data.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [27]:
data.isnull().sum()

customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [28]:
columns_to_encode = ['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
                     'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
                     'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
                     'PaperlessBilling', 'PaymentMethod', 'Churn']

label_encoders = {}

for column in columns_to_encode:
    label_encoder = tf.keras.layers.StringLookup(
        vocabulary=data[column].unique(), mask_token=None, num_oov_indices=0
    )
    data[column] = label_encoder(data[column])
    label_encoders[column] = label_encoder

print(data.head())



   customerID  gender  SeniorCitizen  Partner  Dependents  tenure  \
0  7590-VHVEG       0              0        0           0       1   
1  5575-GNVDE       1              0        1           0      34   
2  3668-QPYBK       1              0        1           0       2   
3  7795-CFOCW       1              0        1           0      45   
4  9237-HQITU       0              0        1           0       2   

   PhoneService  MultipleLines  InternetService  OnlineSecurity  ...  \
0             0              0                0               0  ...   
1             1              1                0               1  ...   
2             1              1                0               1  ...   
3             0              0                0               1  ...   
4             1              1                1               0  ...   

   DeviceProtection  TechSupport  StreamingTV  StreamingMovies  Contract  \
0                 0            0            0                0         0   


In [29]:
data.describe()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,Churn
count,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0
mean,0.504756,0.162147,0.516967,0.299588,32.371149,0.903166,1.325004,0.872923,0.720006,0.871788,0.777226,0.723555,0.817691,0.821241,0.690473,0.407781,1.315633,64.761692,0.26537
std,0.500013,0.368612,0.499748,0.45811,24.559481,0.295752,0.64273,0.737796,0.796885,0.738369,0.778826,0.795896,0.763212,0.761725,0.833755,0.491457,1.148907,30.090047,0.441561
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.25,0.0
25%,0.0,0.0,0.0,0.0,9.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.5,0.0
50%,1.0,0.0,1.0,0.0,29.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,70.35,0.0
75%,1.0,0.0,1.0,1.0,55.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,89.85,1.0
max,1.0,1.0,1.0,1.0,72.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,118.75,1.0


In [30]:
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce', downcast='integer')

In [31]:
data['tenure'] = data['tenure'] / 72
data['MonthlyCharges'] = data['MonthlyCharges'] / 118.75
data['TotalCharges'] = data ['TotalCharges'] / 8684.799805

In [32]:
data = data.drop(columns = ['customerID'])

In [33]:
data.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,0,0,0.013889,0,0,0,0,0,0,0,0,0,0,0,0,0.251368,0.003437,0
1,1,0,1,0,0.472222,1,1,0,1,1,1,0,0,0,1,1,1,0.479579,0.217564,0
2,1,0,1,0,0.027778,1,1,0,1,0,0,0,0,0,0,0,1,0.453474,0.012453,1
3,1,0,1,0,0.625,0,0,0,1,1,1,1,0,0,1,1,2,0.356211,0.211951,0
4,0,0,1,0,0.027778,1,1,1,0,1,0,0,0,0,0,0,0,0.595368,0.017462,1


In [34]:
data.shape

(7043, 20)

Test-Train Split

In [35]:
import numpy as np

x = data.iloc[:, :-1]
y = data.iloc[:, -1]

x.shape, y.shape

((7043, 19), (7043,))

In [36]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

Model Building

In [37]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

model = Sequential ([
    Dense(20),
    Dense(25, activation = 'relu'),
    Dense(15, activation = 'relu'),
    Dense(1, activation = 'sigmoid')
])

In [38]:
import keras

model.compile(
    loss = keras.losses.BinaryCrossentropy(),
    optimizer = keras.optimizers.Adam(lr = 0.001),
    metrics = 'accuracy',
)



In [39]:
x_train.isnull().sum()

gender               0
SeniorCitizen        0
Partner              0
Dependents           0
tenure               0
PhoneService         0
MultipleLines        0
InternetService      0
OnlineSecurity       0
OnlineBackup         0
DeviceProtection     0
TechSupport          0
StreamingTV          0
StreamingMovies      0
Contract             0
PaperlessBilling     0
PaymentMethod        0
MonthlyCharges       0
TotalCharges        10
dtype: int64

In [41]:
model.fit(x_train, y_train, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f87f35558a0>

In [42]:
model.fit(x_test, y_test, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f87f356e620>