<a href="https://colab.research.google.com/github/uzeziogho/Private-Projects/blob/main/Customer_churn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# prompt: Generate code for Logistic regression, XGBoost and Neural Network with this dataset /content/WA_Fn-UseC_-Telco-Customer-Churn.csv managing imbalance

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import xgboost as xgb
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Load the dataset
try:
  data = pd.read_csv('/content/WA_Fn-UseC_-Telco-Customer-Churn.csv')
except FileNotFoundError:
  print("Error: File not found. Please make sure the file path is correct.")
  exit()


# Preprocessing
def preprocess_data(df):
    df.drop('customerID', axis=1, inplace=True)
    df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
    df['TotalCharges'].fillna(0, inplace=True)

    categorical_cols = df.select_dtypes(include='object').columns
    numerical_cols = df.select_dtypes(include=np.number).columns

    le = LabelEncoder()
    for col in categorical_cols:
        df[col] = le.fit_transform(df[col])

    X = df.drop('Churn', axis=1)
    y = df['Churn']

    return X, y


X, y = preprocess_data(data)

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)


# Scale numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Logistic Regression
logreg = LogisticRegression(solver = 'liblinear')
logreg.fit(X_train, y_train)
y_pred_logreg = logreg.predict(X_test)
print("Logistic Regression:")
print(classification_report(y_test, y_pred_logreg))
print("Accuracy:", accuracy_score(y_test, y_pred_logreg))


# XGBoost
xgb_model = xgb.XGBClassifier(random_state=42, use_label_encoder =False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)
print("\nXGBoost:")
print(classification_report(y_test, y_pred_xgb))
print("Accuracy:", accuracy_score(y_test, y_pred_xgb))


# Neural Network
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0) # Verbose set to 0 to suppress output

y_pred_nn = (model.predict(X_test) > 0.5).astype("int32")
print("\nNeural Network:")
print(classification_report(y_test, y_pred_nn))
print("Accuracy:", accuracy_score(y_test, y_pred_nn))

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['TotalCharges'].fillna(0, inplace=True)


Logistic Regression:
              precision    recall  f1-score   support

           0       0.84      0.77      0.80      1021
           1       0.80      0.85      0.82      1049

    accuracy                           0.81      2070
   macro avg       0.82      0.81      0.81      2070
weighted avg       0.81      0.81      0.81      2070

Accuracy: 0.8135265700483092


Parameters: { "use_label_encoder" } are not used.

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



XGBoost:
              precision    recall  f1-score   support

           0       0.85      0.84      0.85      1021
           1       0.85      0.86      0.85      1049

    accuracy                           0.85      2070
   macro avg       0.85      0.85      0.85      2070
weighted avg       0.85      0.85      0.85      2070

Accuracy: 0.8502415458937198
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Neural Network:
              precision    recall  f1-score   support

           0       0.82      0.85      0.83      1021
           1       0.85      0.82      0.83      1049

    accuracy                           0.83      2070
   macro avg       0.83      0.83      0.83      2070
weighted avg       0.83      0.83      0.83      2070

Accuracy: 0.8309178743961353
