In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

%matplotlib inline

In [3]:
df=pd.read_csv("Churn_Modelling.csv")

In [4]:
X= df.drop(['CustomerId','Surname','Exited'],axis=1)
y = df['Exited']

In [5]:
X.head()

Unnamed: 0,RowNumber,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,1,619,France,Female,42,2,0.0,1,1,1,101348.88
1,2,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,3,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,4,699,France,Female,39,1,0.0,2,0,0,93826.63
4,5,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [6]:
print("Categories in 'Gender' variable :=>     ",end=" " )
print(df['Gender'].unique())

print("Categories in 'Geography' variable :=>  ",end=" ")
print(df['Geography'].unique())


Categories in 'Gender' variable :=>      ['Female' 'Male']
Categories in 'Geography' variable :=>   ['France' 'Spain' 'Germany']


In [7]:
# Create Column Transformer with 2 types of transformers
num_features = X.select_dtypes(exclude="object").columns
cat_features = X.select_dtypes(include="object").columns

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

numeric_transformer = StandardScaler()
oh_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    [
        ("OneHotEncoder", oh_transformer, cat_features),
         ("StandardScaler", numeric_transformer, num_features),        
    ]
)

In [8]:
X=preprocessor.fit_transform(X)

In [9]:
X.shape

(10000, 14)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,random_state=42)

In [11]:
X_train.shape,X_test.shape

((7500, 14), (2500, 14))

In [22]:
from sklearn.metrics import f1_score, recall_score, accuracy_score
def evaluate_model(true, predicted):
    f1_score = f1_score(true,predicted)
    recall_score = recall_score(true,predicted)
    accuracy_score=accuracy_score(true,predicted)
    confusion_matrix = confusion_matrix(true, predicted)
    return f1_score, recall_score, accuracy_score, confusion_matrix

In [23]:
models = {
    "Logistic Regression": LogisticRegression(),
    "K-Neighbors Classifier": KNeighborsClassifier(),
    "Decision Tree Classifier": DecisionTreeClassifier(),
    "Random Forest Classifier": RandomForestClassifier(),
    "XGBRegressor": XGBClassifier(), 
    "SVR": SVC(),
    "CatBoostClassifier":CatBoostClassifier(),
    "AdaBoost Classifier": AdaBoostClassifier(),
    "GradientBoostingClassifier": GradientBoostingClassifier()
}
model_list = []
accuracy_list =[]

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train) # Train model

    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    # Evaluate Train and Test dataset
    model_train_f1 , model_train_recall, model_train_accuracy, model_train_matrix = evaluate_model(y_train, y_train_pred)
    model_test_f1 , model_test_recall, model_test_accuracy, model_test_matrix = evaluate_model(y_test, y_test_pred)

    
    print(list(models.keys())[i])
    model = models[list(models.keys())[i]]


    print('Model performance for Training set')
    print("- F1_score: {:.4f}".format(model_train_f1))
    print("- Recall_score: {:.4f}".format(model_train_recall))
    print("- Accuracy_score: {:.4f}".format(model_train_accuracy))
    print("- Confusion Matrix: \n", model_train_matrix)

    print('----------------------------------')
    
    print('Model performance for Test set')
    print("- F1_score: {:.4f}".format(model_test_f1))
    print("- Recall_score: {:.4f}".format(model_test_recall))
    print("- Accuracy_score: {:.4f}".format(model_test_accuracy))
    print("- Confusion Matrix: \n", model_test_matrix)

    accuracy_list.append(model_test_accuracy)
    
    print('='*35)
    print('\n')

UnboundLocalError: local variable 'f1_score' referenced before assignment

In [25]:
from sklearn.metrics import f1_score, recall_score, accuracy_score


models = {
    "Logistic Regression": LogisticRegression(),
    "K-Neighbors Classifier": KNeighborsClassifier(),
    "Decision Tree Classifier": DecisionTreeClassifier(),
    "Random Forest Classifier": RandomForestClassifier(),
    "XGBRegressor": XGBClassifier(),  # Corrected to XGBClassifier
    "SVR": SVC(),
    "CatBoostClassifier": CatBoostClassifier(),
    "AdaBoost Classifier": AdaBoostClassifier(),
    "GradientBoostingClassifier": GradientBoostingClassifier()
}

model_list = []
accuracy_list = []

def evaluate_model(true_labels, predicted_labels):
    """Calculates F1-score, recall, accuracy, and confusion matrix."""
    f1 = f1_score(true_labels, predicted_labels)
    recall = recall_score(true_labels, predicted_labels)
    accuracy = accuracy_score(true_labels, predicted_labels)
    # ... (calculate confusion matrix if needed)  # Add confusion matrix calculation here
    return f1, recall, accuracy

for model_name, model in models.items():
    model.fit(X_train, y_train)  # Train model

    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Evaluate model performance
    model_train_f1, model_train_recall, model_train_accuracy = evaluate_model(y_train, y_train_pred)
    model_test_f1, model_test_recall, model_test_accuracy = evaluate_model(y_test, y_test_pred)

    print(model_name)
    model_list.append(model_name)

    print('Model performance for Training set')
    print("- F1_score: {:.4f}".format(model_train_f1))
    print("- Recall_score: {:.4f}".format(model_train_recall))
    print("- Accuracy_score: {:.4f}".format(model_train_accuracy))

    print('----------------------------------')

    print('Model performance for Test set')
    print("- F1_score: {:.4f}".format(model_test_f1))
    print("- Recall_score: {:.4f}".format(model_test_recall))
    print("- Accuracy_score: {:.4f}".format(model_test_accuracy))

    accuracy_list.append(model_test_accuracy)

    print('=' * 35)
    print('\n')


Logistic Regression
Model performance for Training set
- F1_score: 0.3208
- Recall_score: 0.2178
- Accuracy_score: 0.8105
----------------------------------
Model performance for Test set
- F1_score: 0.2968
- Recall_score: 0.2036
- Accuracy_score: 0.8105


K-Neighbors Classifier
Model performance for Training set
- F1_score: 0.6239
- Recall_score: 0.5085
- Accuracy_score: 0.8740
----------------------------------
Model performance for Test set
- F1_score: 0.4658
- Recall_score: 0.3817
- Accuracy_score: 0.8280


Decision Tree Classifier
Model performance for Training set
- F1_score: 1.0000
- Recall_score: 1.0000
- Accuracy_score: 1.0000
----------------------------------
Model performance for Test set
- F1_score: 0.5071
- Recall_score: 0.5445
- Accuracy_score: 0.7920


Random Forest Classifier
Model performance for Training set
- F1_score: 1.0000
- Recall_score: 1.0000
- Accuracy_score: 1.0000
----------------------------------
Model performance for Test set
- F1_score: 0.5787
- Recall_



AdaBoost Classifier
Model performance for Training set
- F1_score: 0.5852
- Recall_score: 0.4848
- Accuracy_score: 0.8588
----------------------------------
Model performance for Test set
- F1_score: 0.5825
- Recall_score: 0.4987
- Accuracy_score: 0.8595


GradientBoostingClassifier
Model performance for Training set
- F1_score: 0.6160
- Recall_score: 0.4957
- Accuracy_score: 0.8730
----------------------------------
Model performance for Test set
- F1_score: 0.5798
- Recall_score: 0.4758
- Accuracy_score: 0.8645


