In [1]:
import pandas as pd
import numpy as np
import os
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN
from sklearn.ensemble import RandomForestClassifier

In [2]:
os.chdir("E:\\Projects\\Protfolio_Project\\bank+marketing\\bank")

In [3]:
df=pd.read_csv('Bank_Update.csv')

In [4]:
df.head()

Unnamed: 0,age,balance,day,duration,campaign,pdays,previous,y,job_admin.,job_blue-collar,...,poutcome_unknown,age_group_10 - 19,age_group_20 - 29,age_group_30 - 39,age_group_40 - 49,age_group_50 - 59,age_group_60 - 69,age_group_70 - 79,age_group_80 - 89,age_group_90 - 99
0,58,2143,5,261,1,-1,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
1,44,29,5,151,1,-1,0,0,0,0,...,1,0,0,0,1,0,0,0,0,0
2,33,2,5,76,1,-1,0,0,0,0,...,1,0,0,1,0,0,0,0,0,0
3,47,1506,5,92,1,-1,0,0,0,1,...,1,0,0,0,1,0,0,0,0,0
4,33,1,5,198,1,-1,0,0,0,0,...,1,0,0,1,0,0,0,0,0,0


In [5]:
x=df.drop('y',axis=1)
y=df['y']

**Train Test Split**

In [6]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

**Decision Tree Classifier**

In [7]:
model_dt=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [8]:
model_dt.fit(x_train,y_train)

DecisionTreeClassifier(max_depth=6, min_samples_leaf=8, random_state=100)

In [9]:
y_pred=model_dt.predict(x_test)

In [10]:
model_dt.score(x_test,y_test)

0.8989273471193188

In [11]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.92      0.97      0.94      7994
           1       0.61      0.36      0.45      1049

    accuracy                           0.90      9043
   macro avg       0.77      0.66      0.70      9043
weighted avg       0.88      0.90      0.89      9043



**Low accuracy is observed in the model.**

**In imbalanced datasets, accuracy is not a reliable metric.**

**Imbalanced datasets skew the accuracy measure, making it deceptive for model evaluation.**

**it's necessary to focus on the precision, recall, and f1-score for Class 1, indicating churned customers, as these scores are considerably lower (precision: 0.65, recall: 0.38, f1-score: 0.48). These lower scores for the minority class demonstrate the model's limitations in correctly identifying and capturing the churned customers in the dataset.**

#  moving ahead to call SMOTEENN (UpSampling + ENN)

In [12]:
sm = SMOTEENN()
X_resampled, y_resampled = sm.fit_resample(x, y)


In [13]:
xr_train,xr_test,yr_train,yr_test=train_test_split(X_resampled, y_resampled,test_size=0.2)

In [14]:
model_dt_smote=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [15]:
model_dt_smote.fit(xr_train,yr_train)
yr_predict = model_dt_smote.predict(xr_test)
model_score_r = model_dt_smote.score(xr_test, yr_test)
print(model_score_r)
print(metrics.classification_report(yr_test, yr_predict))

0.9168877099911583
              precision    recall  f1-score   support

           0       0.89      0.93      0.91      5545
           1       0.94      0.91      0.92      6896

    accuracy                           0.92     12441
   macro avg       0.92      0.92      0.92     12441
weighted avg       0.92      0.92      0.92     12441



In [16]:
print(metrics.confusion_matrix(yr_test, yr_predict))

[[5138  407]
 [ 627 6269]]


**observing the confusion matrix, the model exhibits promising results with an accuracy of 92%. Impressively, the recall, precision, and f1-score for the minority class demonstrate significant improvemen**

**Random Forest Classifier**

In [17]:
model_rf=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [18]:
model_rf.fit(x_train,y_train)
y_pred=model_rf.predict(x_test)
model_rf.score(x_test,y_test)
print(classification_report(y_test, y_pred, labels=[0,1]))


              precision    recall  f1-score   support

           0       0.90      0.99      0.94      7994
           1       0.76      0.14      0.23      1049

    accuracy                           0.89      9043
   macro avg       0.83      0.56      0.59      9043
weighted avg       0.88      0.89      0.86      9043



In [19]:
from imblearn.combine import SMOTEENN

sm = SMOTEENN()
X_resampled1, y_resampled1 = sm.fit_resample(x, y)


In [20]:
xr_train1,xr_test1,yr_train1,yr_test1=train_test_split(X_resampled1, y_resampled1,test_size=0.2)

In [21]:
model_rf_smote=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [22]:
model_rf_smote.fit(xr_train1,yr_train1)

RandomForestClassifier(max_depth=6, min_samples_leaf=8, random_state=100)

In [23]:
yr_predict1 = model_rf_smote.predict(xr_test1)

In [24]:
model_score_r1 = model_rf_smote.score(xr_test1, yr_test1)

In [25]:
print(model_score_r1)
print(metrics.classification_report(yr_test1, yr_predict1))

0.9465832531280077
              precision    recall  f1-score   support

           0       0.96      0.92      0.94      5534
           1       0.94      0.97      0.95      6934

    accuracy                           0.95     12468
   macro avg       0.95      0.94      0.95     12468
weighted avg       0.95      0.95      0.95     12468



In [26]:
print(metrics.confusion_matrix(yr_test1, yr_predict1))

[[5071  463]
 [ 203 6731]]
