In [57]:
import pandas as pd
import numpy as np
df = pd.read_csv("Churn_Modelling.csv")
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [58]:
df.describe()

Unnamed: 0,RowNumber,CustomerId,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5000.5,15690940.0,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,2886.89568,71936.19,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,1.0,15565700.0,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,2500.75,15628530.0,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,5000.5,15690740.0,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,7500.25,15753230.0,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,10000.0,15815690.0,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


In [59]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder

#Delete irrelevant columns
df = df.drop(['RowNumber','CustomerId','Surname'],axis =1)

#Label Encode the categorical columns
label_encoder = LabelEncoder()
df['Geography'] = label_encoder.fit_transform(df['Geography'])
df['Gender'] = label_encoder.fit_transform(df['Gender'])

#Split dataset into X and Y (target variable)(Y) and features(X)
x = df.drop('Exited',axis = 1)
y = df['Exited']

#Split the dataset into train,validation and testing sets(70:15:15)
x_train,x_temp,y_train,y_temp = train_test_split(x,y,test_size = 0.3,random_state=42)
x_val,x_test,y_val,y_test = train_test_split(x_temp,y_temp,test_size = 0.5,random_state = 42)

#Standardize numerical features
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.fit_transform(x_val)
x_test = scaler.fit_transform(x_test)




In [80]:
from tensorflow import keras
from sklearn.metrics import accuracy_score, precision_score, recall_score
from tensorflow.keras.models import save_model
from sklearn.preprocessing import LabelEncoder
import joblib


# Define a function to create, compile, and train the model
def build_train_model(hidden_layers, units_per_layer, epochs, activation, regularization, learning_rate):
    model = keras.Sequential()

    # Add hidden layers with specified units and activation functions
    for i in range(hidden_layers):
        model.add(keras.layers.Dense(units_per_layer[i], activation=activation))

    # Add output layer
    model.add(keras.layers.Dense(1, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Train the model
    history = model.fit(x_train, y_train, epochs=epochs, validation_data=(x_val, y_val), verbose=0)

    # Evaluate the model on the validation set
    y_pred = (model.predict(x_val) > 0.5).astype("int32")

    # Calculate metrics
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    
    
    tf.keras.models.save_model(model,'churn.hdf5')
    

  # Assuming 'geography' is the column containing Geography data in your training set
    label_encoder_geography = LabelEncoder()
    label_encoder_geography.fit(df['Geography'])

# Save the label encoder to a file
    joblib.dump(label_encoder_geography, 'label_encoder_geography.joblib')
    
  

 # Assuming 'gender' is the column containing Gender data in your training set
    label_encoder_gender = LabelEncoder()
    label_encoder_gender.fit(df['Gender'])

# Save the label encoder to a file
    joblib.dump(label_encoder_gender, 'label_encoder_gender.joblib')



    return accuracy, precision, recall, history

# Experiment setups
experiments = [
    {'hidden_layers': 1, 'units_per_layer': [128], 'epochs': 30, 'activation': 'sigmoid', 'regularization': None, 'learning_rate': 0.01},
    {'hidden_layers': 2, 'units_per_layer': [128, 64], 'epochs': 20, 'activation': 'sigmoid', 'regularization': None, 'learning_rate': 0.01},
    {'hidden_layers': 3, 'units_per_layer': [128, 64, 32], 'epochs': 10, 'activation': 'sigmoid', 'regularization': None, 'learning_rate': 0.01},
    # Add other experiment setups
]

# Run experiments
results = []
for exp in experiments:
    acc, prec, rec, hist = build_train_model(exp['hidden_layers'], exp['units_per_layer'], exp['epochs'], exp['activation'], exp['regularization'], exp['learning_rate'])
    results.append({'Hidden Layers': exp['hidden_layers'], 'Units per Layer': exp['units_per_layer'], 'Epochs': exp['epochs'],
                    'Activation': exp['activation'], 'Regularization': exp['regularization'], 'Learning Rate': exp['learning_rate'],
                    'Accuracy': acc, 'Precision': prec, 'Recall': rec, 'History': hist})


    

# Tabulate the results
tabulated_results = pd.DataFrame(results)
print(tabulated_results)




<IPython.core.display.Javascript object>

  tf.keras.models.save_model(model,'churn.hdf5')




<IPython.core.display.Javascript object>

  tf.keras.models.save_model(model,'churn.hdf5')




<IPython.core.display.Javascript object>

   Hidden Layers Units per Layer  Epochs Activation Regularization  \
0              1           [128]      30    sigmoid           None   
1              2       [128, 64]      20    sigmoid           None   
2              3   [128, 64, 32]      10    sigmoid           None   

   Learning Rate  Accuracy  Precision    Recall  \
0           0.01  0.867333   0.703349  0.517606   
1           0.01  0.864000   0.680180  0.531690   
2           0.01  0.863333   0.682028  0.521127   

                                             History  
0  <keras.src.callbacks.History object at 0x00000...  
1  <keras.src.callbacks.History object at 0x00000...  
2  <keras.src.callbacks.History object at 0x00000...  


  tf.keras.models.save_model(model,'churn.hdf5')


Overwriting app.py


In [86]:

%%writefile app.py
import streamlit as st
import tensorflow as tf

# Replace 'path/to/your/model.h5' with the actual path to your model
model = tf.keras.models.load_model('churn.hdf5')

# ... use the model for prediction


# Model details (customize based on your model)
MODEL_INPUT_FEATURES = [
    "credit_score", "geography", "gender", "age", "tenure", "balance",
    "num_of_products", "has_cr_card", "is_active_member", "estimated_salary"
]
MODEL_OUTPUT_FEATURE = "churn_probability"  # Or "churn_label" if binary

# App title and header
st.title("Customer Churn Prediction App")
st.subheader("Using an Artificial Neural Network Model")

# User input form
user_input = {}
for feature in MODEL_INPUT_FEATURES:
    if feature == "credit_score":
        user_input[feature] = st.slider(feature.title(), 300, 850, 600)
    elif feature == "geography":
        user_input[feature] = st.selectbox(feature.title(), ["France","Germany","Spain"])
    elif feature == "gender":
        user_input[feature] = st.radio(feature.title(), ["Male", "Female"])
    elif feature == "age":
        user_input[feature] = st.number_input(feature.title(), min_value=18)
    elif feature == "tenure":
        user_input[feature] = st.number_input(feature.title(), min_value=0)
    elif feature == "balance":
        user_input[feature] = st.number_input(feature.title(), min_value=0)
    elif feature == "num_of_products":
        user_input[feature] = st.number_input(feature.title(), min_value=0)
    elif feature == "has_cr_card":
        user_input[feature] = st.checkbox(feature.title())
    elif feature == "is_active_member":
        user_input[feature] = st.checkbox(feature.title())
    elif feature == "estimated_salary":
        user_input[feature] = st.number_input(feature.title(), min_value=0)

# Prediction function
def predict_churn(user_input):
    # Preprocess data if needed (e.g., one-hot encode categorical features)
    # ... (Ensure preprocessing matches your model's requirements)

    # Make prediction using your model
    prediction = your_model.predict(user_input[MODEL_INPUT_FEATURES])

    return prediction[0][MODEL_OUTPUT_FEATURE]

# Predict button and result display
if st.button("Predict Churn"):
    prediction = predict_churn(user_input)
    if prediction >= 0.5:  # Adjust threshold as needed based on model
        st.error("Customer is likely to churn.")
    else:
        st.success("Customer is unlikely to churn.")
    st.write(f"Churn probability: {prediction:.2f}")

# Additional insights or visualizations (optional)
# ... (Consider adding charts or explanations based on model outputs)


# ...



Overwriting app.py
