# HEAD

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression,LogisticRegression
import seaborn as sns
%matplotlib inline
from sklearn import metrics
from sklearn.model_selection import train_test_split,KFold,GridSearchCV,RandomizedSearchCV
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import max_error, mean_absolute_error as mae, median_absolute_error as medae, mean_squared_log_error as msle
from sklearn.metrics import r2_score,plot_confusion_matrix
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report
import cufflinks as cf
from sklearn.metrics import roc_curve, roc_auc_score, auc
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import AdaBoostClassifier,GradientBoostingClassifier,RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from IPython.display import Image
from sklearn.tree import export_graphviz,DecisionTreeRegressor
from imblearn.over_sampling import SMOTE
from eli5 import show_weights
from eli5.sklearn import PermutationImportance
from sklearn.utils import resample


The sklearn.metrics.scorer module is  deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.metrics. Anything that cannot be imported from sklearn.metrics is now part of the private API.


The sklearn.feature_selection.base module is  deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.feature_selection. Anything that cannot be imported from sklearn.feature_selection is now part of the private API.



In [2]:
from jupyterthemes import jtplot
jtplot.style(theme='monokai')

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

In [13]:
df = pd.read_csv('CreditCardDataset_cleaned.csv')
df

Unnamed: 0,Offer_Accepted,Reward,Mailer_Type,Income_Level,Bank_Accnt_Open,Overdraft_Protection,Credit_Rating,Credit_Cards_Held,Homes_Owned,Household_Size,Own_Your_Home,Average_Balance,Q1_Balance,Q2_Balance,Q3_Balance,Q4_Balance
0,0,Air Miles,Letter,High,1,No,3,2,1,4,No,1160.75,1669.0,877.0,1095.0,1002.0
1,0,Air Miles,Letter,Medium,1,No,2,2,2,5,Yes,147.25,39.0,106.0,78.0,366.0
2,0,Air Miles,Postcard,High,2,No,2,2,1,2,Yes,276.50,367.0,352.0,145.0,242.0
3,0,Air Miles,Letter,Medium,2,No,3,1,1,4,No,1219.00,1578.0,1760.0,1119.0,419.0
4,0,Air Miles,Letter,Medium,1,No,2,2,1,6,Yes,1211.00,2140.0,1357.0,982.0,365.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17971,0,Cash Back,Letter,High,1,No,1,1,1,5,Yes,167.50,136.0,65.0,71.0,398.0
17972,0,Cash Back,Letter,High,1,No,1,3,1,3,Yes,850.50,984.0,940.0,943.0,535.0
17973,0,Cash Back,Letter,High,1,No,1,2,1,4,No,1087.25,918.0,767.0,1170.0,1494.0
17974,0,Cash Back,Letter,Medium,1,No,2,4,2,2,Yes,1022.25,626.0,983.0,865.0,1615.0


# Making the dummy data

In [15]:
reward = pd.get_dummies(df['Reward'],prefix='Reward')
mailer = pd.get_dummies(df['Mailer_Type'],prefix='mailer')
income = pd.get_dummies(df['Income_Level'],prefix='income')
overdraw = pd.get_dummies(df['Overdraft_Protection'],prefix='overdraw')
creditcard = pd.get_dummies(df['Credit_Cards_Held'],prefix='CC')
homes = pd.get_dummies(df['Homes_Owned'],prefix='hold_home')


In [16]:
dtrain = df.copy()

In [17]:
dtrain = dtrain[['Offer_Accepted','Bank_Accnt_Open','Household_Size','Homes_Owned','Credit_Rating','Average_Balance','Q1_Balance','Q2_Balance','Q3_Balance','Q4_Balance']]
dtrain

Unnamed: 0,Offer_Accepted,Bank_Accnt_Open,Household_Size,Homes_Owned,Credit_Rating,Average_Balance,Q1_Balance,Q2_Balance,Q3_Balance,Q4_Balance
0,0,1,4,1,3,1160.75,1669.0,877.0,1095.0,1002.0
1,0,1,5,2,2,147.25,39.0,106.0,78.0,366.0
2,0,2,2,1,2,276.50,367.0,352.0,145.0,242.0
3,0,2,4,1,3,1219.00,1578.0,1760.0,1119.0,419.0
4,0,1,6,1,2,1211.00,2140.0,1357.0,982.0,365.0
...,...,...,...,...,...,...,...,...,...,...
17971,0,1,5,1,1,167.50,136.0,65.0,71.0,398.0
17972,0,1,3,1,1,850.50,984.0,940.0,943.0,535.0
17973,0,1,4,1,1,1087.25,918.0,767.0,1170.0,1494.0
17974,0,1,2,2,2,1022.25,626.0,983.0,865.0,1615.0


In [18]:
dtrain = pd.concat([dtrain,reward,mailer,income,overdraw,creditcard,homes],axis=1)
dtrain

Unnamed: 0,Offer_Accepted,Bank_Accnt_Open,Household_Size,Homes_Owned,Credit_Rating,Average_Balance,Q1_Balance,Q2_Balance,Q3_Balance,Q4_Balance,Reward_Air Miles,Reward_Cash Back,Reward_Points,mailer_Letter,mailer_Postcard,income_High,income_Low,income_Medium,overdraw_No,overdraw_Yes,CC_1,CC_2,CC_3,CC_4,hold_home_1,hold_home_2,hold_home_3
0,0,1,4,1,3,1160.75,1669.0,877.0,1095.0,1002.0,1,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0
1,0,1,5,2,2,147.25,39.0,106.0,78.0,366.0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0
2,0,2,2,1,2,276.50,367.0,352.0,145.0,242.0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0
3,0,2,4,1,3,1219.00,1578.0,1760.0,1119.0,419.0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,0,0
4,0,1,6,1,2,1211.00,2140.0,1357.0,982.0,365.0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17971,0,1,5,1,1,167.50,136.0,65.0,71.0,398.0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,1,0,0
17972,0,1,3,1,1,850.50,984.0,940.0,943.0,535.0,0,1,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0
17973,0,1,4,1,1,1087.25,918.0,767.0,1170.0,1494.0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0
17974,0,1,2,2,2,1022.25,626.0,983.0,865.0,1615.0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,1,0


# Split data train and test

In [9]:
X = dtrain.drop('Offer_Accepted',axis=1)
y = dtrain['Offer_Accepted']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [10]:
X_train

Unnamed: 0,Bank_Accnt_Open,Household_Size,Credit_Rating,Average_Balance,Q1_Balance,Q2_Balance,Q3_Balance,Q4_Balance,Reward_Air Miles,Reward_Cash Back,Reward_Points,mailer_Letter,mailer_Postcard,income_High,income_Low,income_Medium,overdraw_No,overdraw_Yes,CC_1,CC_2,CC_3,CC_4,hold_home_1,hold_home_2,hold_home_3,owningHome_No,owningHome_Yes
16511,1,3,2,1104.25,2056.0,1013.0,801.0,547.0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1
7683,1,3,1,1277.00,545.0,1398.0,1233.0,1932.0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,0,1
9189,1,4,1,277.50,476.0,34.0,200.0,400.0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1
13376,1,3,1,254.50,160.0,463.0,95.0,300.0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,0,1
647,2,3,3,1133.75,1870.0,1271.0,678.0,716.0,1,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11284,1,3,3,1131.25,345.0,790.0,1434.0,1956.0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0
11964,2,4,1,986.75,1288.0,1234.0,1137.0,288.0,0,1,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1
5390,2,6,2,601.50,581.0,714.0,550.0,561.0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0
860,1,4,1,161.00,43.0,33.0,234.0,334.0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,1


In [11]:
y_train.value_counts()

0    11876
1      707
Name: Offer_Accepted, dtype: int64

# MODEL TRAINING

In [None]:
# split original data
X = dtrain.drop('Offer_Accepted',axis=1)
y = dtrain['Offer_Accepted']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Logistic Regression

## SMOTE

## OVERSAMPLING

## PCA

## Randomized Search

## DecisionTreeClassifier / Classification Trees

## Naive Bayes Classifier