In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn import svm


# load dataset
df = pd.read_csv("BankChurners.csv")

print(df.head())

feature_cols = ['Customer_Age', 'Gender',
       'Dependent_count', 'Education_Level', 'Marital_Status',
       'Income_Category', 'Card_Category', 'Months_on_book',
       'Total_Relationship_Count', 'Months_Inactive_12_mon',
       'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',
       'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt',
       'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio',
       'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1',
       'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2']



   CLIENTNUM     Attrition_Flag  Customer_Age Gender  Dependent_count  \
0  768805383  Existing Customer            45      M                3   
1  818770008  Existing Customer            49      F                5   
2  713982108  Existing Customer            51      M                3   
3  769911858  Existing Customer            40      F                4   
4  709106358  Existing Customer            40      M                3   

  Education_Level Marital_Status Income_Category Card_Category  \
0     High School        Married     $60K - $80K          Blue   
1        Graduate         Single  Less than $40K          Blue   
2        Graduate        Married    $80K - $120K          Blue   
3     High School        Unknown  Less than $40K          Blue   
4      Uneducated        Married     $60K - $80K          Blue   

   Months_on_book  ...  Credit_Limit  Total_Revolving_Bal  Avg_Open_To_Buy  \
0              39  ...       12691.0                  777          11914.0   
1       

In [2]:
#preprocess
df['Attrition_Flag'] = df['Attrition_Flag'].replace({'Existing Customer':0, 'Attrited Customer':1})
df.drop( "CLIENTNUM", inplace = True, axis = 1 )
df['Gender'] = df['Gender'].replace({'M':0, 'F':1})
df['Education_Level'] = df['Education_Level'].replace({'High School':0, 'Graduate':1, 'Uneducated':2, 'Unknown':3, 'College':4, 'Post-Graduate':5, 'Doctorate':6})
df['Marital_Status'] = df['Marital_Status'].replace({'Married':0, 'Single':1, 'Unknown':2, 'Divorced':3})
df['Income_Category'] = df['Income_Category'].replace({'$60K - $80K':0, 'Less than $40K':1, '$80K - $120K':2, '$40K - $60K':3, '$120K +':4, 'Unknown':5})
df['Card_Category'] = df['Card_Category'].replace({'Blue':0, 'Gold':1, 'Silver':2, 'Platinum':3})
df.isnull().sum() 

Attrition_Flag                                                                                                                        0
Customer_Age                                                                                                                          0
Gender                                                                                                                                0
Dependent_count                                                                                                                       0
Education_Level                                                                                                                       0
Marital_Status                                                                                                                        0
Income_Category                                                                                                                       0
Card_Category                                   

In [3]:
X = df[feature_cols] # Features
y = df.Attrition_Flag # Target variable

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test

print(X_train.head())

      Customer_Age  Gender  Dependent_count  Education_Level  Marital_Status  \
8284            45       1                4                4               2   
9863            29       0                1                1               3   
5350            55       1                2                3               1   
9641            51       1                3                2               0   
5914            47       1                3                0               1   

      Income_Category  Card_Category  Months_on_book  \
8284                1              0              40   
9863                1              0              36   
5350                5              0              36   
9641                1              0              33   
5914                1              0              31   

      Total_Relationship_Count  Months_Inactive_12_mon  ...  Credit_Limit  \
8284                         1                       2  ...        1795.0   
9863                        

In [51]:
svcf = svm.SVC(kernel='linear', probability=True)
svcf.fit(X_train,y_train)
y_pred = svcf.predict(X_test)

y_pred_prob = svcf.predict_proba(X_test)[:,1]

print(confusion_matrix(y_test, y_pred))
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print ("AUC Score:", roc_auc_score(y_test, y_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))
print ("Precision:", precision_score(y_test, y_pred))
print ("Recall:", recall_score(y_test, y_pred))
print ("F1 Score:", f1_score(y_test, y_pred))



[[2537   14]
 [  83  405]]
Accuracy: 0.9680816057913787
AUC Score: 0.9122149944412671
AUC Score prob: 0.9793901138094352
Precision: 0.9665871121718377
Recall: 0.8299180327868853
F1 Score: 0.8930540242557882


In [6]:
svcf = svm.SVC(kernel='linear', C=0.1, probability=True)
svcf.fit(X_train,y_train)
y_pred = svcf.predict(X_test)

y_pred_prob = svcf.predict_proba(X_test)[:,1]

print(confusion_matrix(y_test, y_pred))
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print ("AUC Score:", roc_auc_score(y_test, y_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))
print ("Precision:", precision_score(y_test, y_pred))
print ("Recall:", recall_score(y_test, y_pred))
print ("F1 Score:", f1_score(y_test, y_pred))

[[2536   15]
 [  84  404]]
Accuracy: 0.9674234945705824
AUC Score: 0.91099440270932
AUC Score prob: 0.9789085443831093
Precision: 0.964200477326969
Recall: 0.8278688524590164
F1 Score: 0.8908489525909593


In [5]:
svcf = svm.SVC(kernel='linear', C=10, probability=True)
svcf.fit(X_train,y_train)
y_pred = svcf.predict(X_test)

y_pred_prob = svcf.predict_proba(X_test)[:,1]

print(confusion_matrix(y_test, y_pred))
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print ("AUC Score:", roc_auc_score(y_test, y_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))
print ("Precision:", precision_score(y_test, y_pred))
print ("Recall:", recall_score(y_test, y_pred))
print ("F1 Score:", f1_score(y_test, y_pred))

[[2534   17]
 [  89  399]]
Accuracy: 0.9651201052977953
AUC Score: 0.9054794487536229
AUC Score prob: 0.9750628168959778
Precision: 0.9591346153846154
Recall: 0.8176229508196722
F1 Score: 0.8827433628318584


In [4]:
svcf = svm.SVC(kernel='poly', probability=True)
svcf.fit(X_train,y_train)
y_pred = svcf.predict(X_test)

y_pred_prob = svcf.predict_proba(X_test)[:,1]

print(confusion_matrix(y_test, y_pred))
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print ("AUC Score:", roc_auc_score(y_test, y_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))


[[2551    0]
 [ 488    0]]
Accuracy: 0.8394208621256992
AUC Score: 0.5
AUC Score prob: 0.7031178708446061


In [5]:
svcf = svm.SVC(kernel='poly', degree = 2, probability=True)
svcf.fit(X_train,y_train)
y_pred = svcf.predict(X_test)

y_pred_prob = svcf.predict_proba(X_test)[:,1]

print(confusion_matrix(y_test, y_pred))
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print ("AUC Score:", roc_auc_score(y_test, y_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))


[[2551    0]
 [ 488    0]]
Accuracy: 0.8394208621256992
AUC Score: 0.5
AUC Score prob: 0.7479845576469529


In [4]:
svcf = svm.SVC(kernel='rbf', probability=True)
svcf.fit(X_train,y_train)
y_pred = svcf.predict(X_test)

y_pred_prob = svcf.predict_proba(X_test)[:,1]

print(confusion_matrix(y_test, y_pred))
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print ("AUC Score:", roc_auc_score(y_test, y_pred))
print ("AUC Score prob:", roc_auc_score(y_test, y_pred_prob))


[[2551    0]
 [ 488    0]]
Accuracy: 0.8394208621256992
AUC Score: 0.5
AUC Score prob: 0.8317712115467415
