In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
%matplotlib inline


In [None]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/codebasics/deep-learning-keras-tf-tutorial/refs/heads/master/11_chrun_prediction/customer_churn.csv")

In [None]:
df.drop("customerID", axis='columns', inplace=True)

In [None]:
df.TotalCharges = pd.to_numeric(df.TotalCharges, errors='coerce')
df.TotalCharges = df.TotalCharges.fillna(df.TotalCharges.mean())

In [None]:
df.TotalCharges.dtypes

In [None]:
tenure_churn_no = df[df.Churn == 'No'].tenure
tenure_churn_yes = df[df.Churn == 'Yes'].tenure

plt.xlabel("tenure")
plt.ylabel("Number Of Customers")
plt.title("Customer Churn Prediction Visualiztion")

blood_sugar_men = [113, 85, 90, 150, 149, 88, 93, 115, 135, 80, 77, 82, 129]
blood_sugar_women = [67, 98, 89, 120, 133, 150, 84, 69, 89, 79, 120, 112, 100]

plt.hist([tenure_churn_yes, tenure_churn_no], rwidth=0.95,
         color=['green', 'red'], label=['Churn=Yes', 'Churn=No'])
plt.legend()

In [None]:
mc_churn_no = df[df.Churn == 'No'].MonthlyCharges
mc_churn_yes = df[df.Churn == 'Yes'].MonthlyCharges

plt.xlabel("Monthly Charges")
plt.ylabel("Number Of Customers")
plt.title("Customer Churn Prediction Visualiztion")

blood_sugar_men = [113, 85, 90, 150, 149, 88, 93, 115, 135, 80, 77, 82, 129]
blood_sugar_women = [67, 98, 89, 120, 133, 150, 84, 69, 89, 79, 120, 112, 100]

plt.hist([mc_churn_yes, mc_churn_no], rwidth=0.95, color=[
         'green', 'red'], label=['Churn=Yes', 'Churn=No'])
plt.legend()

In [None]:
def print_unique_col_values(df):
    for column in df:
        if df[column].dtypes == 'object':
            print(f'{column}: {df[column].unique()}')

In [None]:
print_unique_col_values(df  )

In [None]:
df.replace('No internet service', 'No', inplace=True)
df.replace('No phone service', 'No', inplace=True)

In [None]:
print_unique_col_values(df)

In [None]:
yes_no_columns = ['Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
                  'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'PaperlessBilling', 'Churn']
for col in yes_no_columns:
    df.replace({col: {'Yes': 1, 'No': 0}}, inplace=True)

In [None]:
for col in df:
    print(f'{col}: {df[col].unique()}')

In [None]:
df['gender'] = df['gender'].replace({'Male': 1, 'Female': 0}).infer_objects(copy=False)

In [None]:
df.gender.unique()

In [None]:
df2 = pd.get_dummies(data=df, columns=[
                     'InternetService', 'Contract', 'PaymentMethod'])
df2.columns

In [None]:
from sklearn.preprocessing import MinMaxScaler
cols_to_scale = ['tenure', 'MonthlyCharges', 'TotalCharges']

scaler = MinMaxScaler()
df2[cols_to_scale] = scaler.fit_transform(df2[cols_to_scale])

In [None]:
for col in df2:
    print(f'{col}: {df2[col].unique()}')

In [None]:

from sklearn.model_selection import train_test_split
X = df2.drop('Churn', axis='columns')
y = df2['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=5)

In [None]:
len(X_train.columns)

In [None]:
import tensorflow as tf
from tensorflow import keras


model = keras.Sequential([
    keras.layers.Input(shape=(26,)),
    keras.layers.Dense(26, activation='relu'),
    keras.layers.Dense(15, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [None]:
model.fit(X_train, y_train, epochs=5)

In [None]:
yes_no_columns = ['Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
                  'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'PaperlessBilling', 'Churn']
for col in yes_no_columns:
    df.replace({col: {'Yes': 1, 'No': 0}}, inplace=True)

In [None]:
model.evaluate(X_test, y_test)

In [None]:
yp = model.predict(X_test)
yp[:5]

In [None]:
y_pred = []
for element in yp:
    if element > 0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

print(classification_report(y_test, y_pred))

In [None]:
import seaborn as sn
cm = tf.math.confusion_matrix(labels=y_test, predictions=y_pred)

plt.figure(figsize=(10, 7))
sn.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Truth')

In [None]:
def ANN(X_train, y_train, X_test, y_test, loss, weights = -1):
    model = keras.Sequential([
        keras.Input(shape=(26,)),
        keras.layers.Dense(26, activation='relu'),
        keras.layers.Dense(15, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                  loss=loss,
                  metrics=['accuracy'])

    if weights == -1:
        model.fit(X_train, y_train, epochs=100)
    else:
        model.fit(X_train, y_train, epochs=100, class_weight=weights)
        
    y_preds = model.predict(X_test)
    y_preds = np.round(y_preds).astype(int)
    print("Classification Report:\n", classification_report(y_test, y_preds))
    
    return y_preds

In [None]:
y_preds = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy')

In [None]:
count_class_0, count_class_1 = df2.Churn.value_counts()
df_class_0 = df2[df2.Churn == 0]
df_class_1 = df2[df2.Churn == 1]

In [None]:
count_class_0, count_class_1

In [None]:
df_class_0.sample(2)

In [None]:
df_class_0_under = df_class_0.sample(count_class_1)
df_test_under = pd.concat([df_class_0_under, df_class_1], axis=0)
df_test_under.Churn.value_counts()

In [None]:
X = df_test_under.drop('Churn', axis=1)
y = df_test_under['Churn']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=15, stratify=y)

In [None]:
ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy')

In [None]:
count_class_0, count_class_1

In [None]:
df_class_1.shape

In [None]:
df_class_1_over = df_class_1.sample(count_class_0, replace=True)

In [None]:
df_class_1_over.shape

In [None]:
df_test_over = pd.concat([df_class_0, df_class_1_over], axis=0)

In [None]:
X = df_test_over.drop('Churn', axis='columns')
y = df_test_over['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=15, stratify=y)


In [None]:
y_preds = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
X = df2.drop('Churn', axis='columns')
y = df2['Churn']

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(sampling_strategy="minority")
X_resampled, y_resampled = smote.fit_resample(X, y)

y_resampled.value_counts()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled,test_size=0.2,random_state=5, stratify=y_resampled)

In [None]:
y_pred = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
df2.Churn.value_counts()

In [None]:
X = df2.drop('Churn', axis='columns')
y = df2['Churn']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=5, stratify=y)

In [None]:
df3 = X_train.copy()
df3['Churn'] = y_train

In [None]:
df3_class0 = df3[df3.Churn == 0]
df3_class1 = df3[df3.Churn == 1]

In [None]:
def get_train_batch(df_majority, df_minority, start, end):
    df_train = pd.concat([df_majority[start:end], df_minority], axis=0)
    
    X_train = df_train.drop('Churn', axis='columns')
    y_train = df_train['Churn']
    
    return X_train, y_train


In [None]:
X_train,y_train = get_train_batch(df3_class0, df3_class1, 0, 1495)
y_pred1 = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
X_train, y_train = get_train_batch(df3_class0, df3_class1, 1495, 2990)
y_pred2 = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)

In [None]:
X_train, y_train = get_train_batch(df3_class0, df3_class1, 2990, 4485)
y_pred3 = ANN(X_train, y_train, X_test, y_test, 'binary_crossentropy', -1)
