In [43]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the dataset
df = pd.read_csv('Churnnew1.csv')

# Handle missing values if any
df = df.dropna()

# Define the selected columns for independent variables (X)
selected_columns = ['Gender', 'Senior Citizen', 'Partner', 'Dependents', 'tenure', 'Phone Service', 'Monthly Charges', 'Total Charges','Payment Method']

# Convert categorical variables to numerical using label encoding
label_encoders = {}
for eachcol in selected_columns:
    if eachcol in ['Gender', 'Senior Citizen', 'Partner', 'Dependents', 'Phone Service', 'Payment Method']:
        le = LabelEncoder()
        df[eachcol] = le.fit_transform(df[eachcol])
        label_encoders[eachcol] = le
#label_encoders[col] = le: After label encoding is applied to the column, the LabelEncoder object le is stored in a dictionary called label_encoders.
#The key in the label_encoders dictionary is the column name, and the associated value is the LabelEncoder object(le).

In [39]:
df

Unnamed: 0,Gender,Senior Citizen,Partner,Dependents,tenure,Phone Service,Monthly Charges,Total Charges,Payment Method,Churn
0,0,0,1,0,1,0,29.85,29.85,2,No
1,0,0,1,0,1,0,29.85,29.85,2,No
2,1,0,0,0,34,1,56.95,1889.5,3,No
3,1,0,0,0,2,1,53.85,108.15,3,Yes
4,1,0,0,0,45,0,42.3,1840.75,0,No
5,0,0,0,0,2,1,70.7,151.65,2,Yes
6,0,0,0,0,8,1,99.65,820.5,2,Yes
7,1,0,0,1,22,1,89.1,1949.4,1,No
8,0,0,0,0,10,0,29.75,301.9,3,No
9,0,0,1,0,28,1,104.8,3046.05,2,Yes


In [40]:
# Use the selected columns as independent variables (X)
X = df[selected_columns]

# Target variable (y)
y = df['Churn']

# Convert target variable to numeric (assuming it's binary)
y = y.map({'Yes': 1, 'No': 0})

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)






In [41]:
# Create a neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1069dad4490>

In [42]:
# Evaluate the model
y_pred = model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred_binary)
conf_matrix = confusion_matrix(y_test, y_pred_binary)
classification_rep = classification_report(y_test, y_pred_binary)

print(f"Test Accuracy: {accuracy:.4f}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(classification_rep)

# Save the model
model.save('churn_prediction_model.h5')

# Save label encoders for future use
import joblib
joblib.dump(label_encoders, 'label_encoders.pkl')

Test Accuracy: 0.8000
Confusion Matrix:
[[4 0]
 [1 0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.80      1.00      0.89         4
           1       0.00      0.00      0.00         1

    accuracy                           0.80         5
   macro avg       0.40      0.50      0.44         5
weighted avg       0.64      0.80      0.71         5



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


['label_encoders.pkl']

In [45]:
import pandas as pd
import numpy as np
import tensorflow as tf
import joblib

# Load the label encoders
label_encoders = joblib.load('label_encoders.pkl')

# Load the trained model
model = tf.keras.models.load_model('churn_prediction_model.h5')

# Create a dictionary with new values for independent variables
# Female,0,Yes,Yes,69,Yes,113.25,7895.15,No

new_data = {
    'Gender': ['Female'],
    'Senior Citizen': [0],
    'Partner': ['Yes'],
    'Dependents': ['Yes'],
    'tenure': [69],
    'Phone Service': ['Yes'],
    'Monthly Charges': [1122.25],
    'Total Charges': [1113.15],
    'Payment Method': ['Mailed check']
}

# Create a DataFrame from the new data
new_df = pd.DataFrame(new_data)

# Apply label encoding to the new data
selected_columns = ['Gender', 'Senior Citizen', 'Partner', 'Dependents', 'tenure', 'Phone Service', 'Monthly Charges', 'Total Charges','Payment Method']

for colum in selected_columns:
    if colum in label_encoders:
        le = label_encoders[colum]
        new_df[colum] = le.transform(new_df[colum])

# Make predictions on the new data
new_X = new_df[selected_columns]
new_predictions = model.predict(new_X)

print(new_predictions)

# Convert predictions to binary format (0 or 1) based on a threshold (e.g., 0.5)
new_predictions_binary = (new_predictions > 0.5).astype(int)
print(new_predictions_binary)

# Display the predictions
print("Predicted Churn (1 for Churned, 0 for Not Churned):")
print(new_predictions_binary[0])


[[0.]]
[[0]]
Predicted Churn (1 for Churned, 0 for Not Churned):
[0]
