In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

# Classifier Libraries
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import collections

# Other Libraries
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
# from imblearn.pipeline import make_pipeline as imbalanced_make_pipeline
# from imblearn.over_sampling import SMOTE
# from imblearn.under_sampling import NearMiss
# from imblearn.metrics import classification_report_imbalanced
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score, classification_report
from collections import Counter
from sklearn.model_selection import KFold, StratifiedKFold
# import tensorflow as tf
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA, TruncatedSVD
import matplotlib.patches as mpatches
import time

from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score

import warnings
import random
warnings.filterwarnings("ignore")

random.seed(42)

In [2]:
data = pd.read_csv(".\data\creditcard.csv")

In [3]:
df = data.copy()

In [5]:
#check for any null values
data.isnull().sum().max()

In [6]:
# The classes are heavily skewed we need to solve this issue later.
print('No Frauds', round(data['Class'].value_counts()[0]/len(data) * 100,2), '% of the dataset')
print('Frauds', round(data['Class'].value_counts()[1]/len(data) * 100,2), '% of the dataset')

In [7]:
colors = ["#0101DF", "#DF0101"]

sns.countplot('Class', data=data, palette=colors)
plt.title('Class Distributions \n (0: No Fraud || 1: Fraud)', fontsize=14)

In [8]:
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(12,4))

bins = 50

ax1.hist(df.Time[df.Class == 1], bins = bins)
ax1.set_title('Fraud')

ax2.hist(df.Time[df.Class == 0], bins = bins)
ax2.set_title('Normal')

plt.xlabel('Time (in Seconds)')
plt.ylabel('Number of Transactions')
plt.show()

In [10]:
# Cyclical encoding of the seconds attribute
seconds_in_day = 24*60*60

df['sin_time'] = np.sin(2*np.pi*df.Time/seconds_in_day)
df['cos_time'] = np.cos(2*np.pi*df.Time/seconds_in_day)

df.drop('Time', axis=1, inplace=True)

In [11]:
# Separate input features and target
y = df.Class
X = df.drop('Class', axis=1)

# setting up testing and training sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=27)

## First we model the data as-is.

In [14]:
# Modeling the data as is using LogisticRegression

lr = LogisticRegression(solver='liblinear').fit(X_train, y_train)
 
# Predict on testing set
lr_pred = lr.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, lr_pred)
print("f1 score :", f1)
precision = precision_score(y_test, lr_pred)
print("precision :",precision)
recall = recall_score(y_test, lr_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, lr_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, lr_pred)
print("Average Precision-Recall Score :",average_precision)

# f1 score : 0.7443946188340808
# precision : 0.9120879120879121
# recall : 0.6287878787878788
# Average Precision-Recall Score : 0.5741980064260337

In [16]:
# Modeling the data as is using RandomForestClassifier

# train model
rfc = RandomForestClassifier(n_estimators=1000).fit(X_train, y_train)

# predict on test set
rfc_pred = rfc.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, rfc_pred)
print("f1 score :", f1)
precision = precision_score(y_test, rfc_pred)
print("precision :",precision)
recall = recall_score(y_test, rfc_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, rfc_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, rfc_pred)
print("Average Precision-Recall Score :",average_precision)

# f1 score : 0.8702928870292888
# precision : 0.9719626168224299
# recall : 0.7878787878787878
# Average Precision-Recall Score : 0.7661819757862424

In [18]:
# Modeling the data as is using KNeighborsClassifier

neigh = KNeighborsClassifier(n_neighbors=2)
neigh.fit(X_train, y_train)

knc_pred = neigh.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, knc_pred)
print("f1 score :", f1)
precision = precision_score(y_test, knc_pred)
print("precision :",precision)
recall = recall_score(y_test, knc_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, knc_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, knc_pred)
print("Average Precision-Recall Score :",average_precision)

In [19]:
# Modeling the data as is using SVC

from sklearn import svm

clf = svm.SVC()
clf.fit(X_train, y_train)
SVM_pred = clf.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, SVM_pred)
print("f1 score :", f1)
precision = precision_score(y_test, SVM_pred)
print("precision :",precision)
recall = recall_score(y_test, SVM_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, SVM_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, SVM_pred)
print("Average Precision-Recall Score :",average_precision)

In [20]:
# Modeling the data as is using MLPClassifier

mlp = MLPClassifier(activation = "logistic")
mlp.fit(X_train, y_train)

mlp_pred = mlp.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, mlp_pred)
print("f1 score :", f1)
precision = precision_score(y_test, mlp_pred)
print("precision :",precision)
recall = recall_score(y_test, mlp_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, mlp_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, mlp_pred)
print("Average Precision-Recall Score :",average_precision)

## Lets try these same classifiers with over sampling the under represented(fraud) data

In [21]:
from sklearn.utils import resample

# Separate input features and target
y = df.Class
X = df.drop('Class', axis=1)

# setting up testing and training sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=27)

# concatenate our training data back together
X = pd.concat([X_train, y_train], axis=1)

# separate minority and majority classes
not_fraud = X[X.Class==0]
fraud = X[X.Class==1]

# upsample minority
fraud_upsampled = resample(fraud,
                          replace=True, # sample with replacement
                          n_samples=len(not_fraud), # match number in majority class
                          random_state=27) # reproducible results

# combine majority and upsampled minority
upsampled = pd.concat([not_fraud, fraud_upsampled])

upsampled.Class.value_counts()

y_train = upsampled.Class
X_train = upsampled.drop('Class', axis=1)

In [22]:
# Modeling the upsampled data using LogisticRegression

upsampled = LogisticRegression(solver='liblinear').fit(X_train, y_train)
upsampled_pred = upsampled.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, upsampled_pred)
print("f1 score :", f1)
precision = precision_score(y_test, upsampled_pred)
print("precision :",precision)
recall = recall_score(y_test, upsampled_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, upsampled_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, upsampled_pred)
print("Average Precision-Recall Score :",average_precision)
# f1 score : 0.12520237452779276
# precision : 0.0674026728646136
# recall : 0.8787878787878788
# Average Precision-Recall Score : 0.059457364700293704

In [23]:
# Modeling the upsampled data using RandomForestClassifier

upsampled = RandomForestClassifier(n_estimators=1000).fit(X_train, y_train)
upsampled_pred = upsampled.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, upsampled_pred)
print("f1 score :", f1)
precision = precision_score(y_test, upsampled_pred)
print("precision :",precision)
recall = recall_score(y_test, upsampled_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, upsampled_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, upsampled_pred)
print("Average Precision-Recall Score :",average_precision)

# f1 score : 0.8713692946058091
# precision : 0.963302752293578
# recall : 0.7954545454545454
# Average Precision-Recall Score : 0.7666427557921818

In [25]:
# Modeling the upsampled data as is using KNeighborsClassifier

upsampled = KNeighborsClassifier(n_neighbors=2).fit(X_train, y_train)
upsampled_pred = upsampled.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, upsampled_pred)
print("f1 score :", f1)
precision = precision_score(y_test, upsampled_pred)
print("precision :",precision)
recall = recall_score(y_test, upsampled_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, upsampled_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, upsampled_pred)

In [26]:
# Modeling the upsampled data as is using SVC

from sklearn import svm

upsampled = svm.SVC().fit(X_train, y_train)
upsampled_pred = upsampled.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, upsampled_pred)
print("f1 score :", f1)
precision = precision_score(y_test, upsampled_pred)
print("precision :",precision)
recall = recall_score(y_test, upsampled_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, upsampled_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, upsampled_pred)
print("Average Precision-Recall Score :",average_precision)

In [27]:
# Modeling the upsampled data as is using MLPClassifier

upsampled = MLPClassifier(activation = "logistic").fit(X_train, y_train)
upsampled_pred = upsampled.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, upsampled_pred)
print("f1 score :", f1)
precision = precision_score(y_test, upsampled_pred)
print("precision :",precision)
recall = recall_score(y_test, upsampled_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, upsampled_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, upsampled_pred)
print("Average Precision-Recall Score :",average_precision)

## Lets try under sampling the over-represented negative(non-fraud) class

In [28]:
# still using our separated classes fraud and not_fraud from above

# downsample majority
not_fraud_downsampled = resample(not_fraud,
                                replace = False, # sample without replacement
                                n_samples = len(fraud), # match minority n
                                random_state = 27) # reproducible results

# combine minority and downsampled majority
downsampled = pd.concat([not_fraud_downsampled, fraud])

# checking counts
downsampled.Class.value_counts()

y_train = downsampled.Class
X_train = downsampled.drop('Class', axis=1)

In [29]:
# Modeling the undersampled data using LogisticRegression

undersampled = LogisticRegression(solver='liblinear').fit(X_train, y_train)
undersampled_pred = undersampled.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, undersampled_pred)
print("f1 score :", f1)
precision = precision_score(y_test, undersampled_pred)
print("precision :",precision)
recall = recall_score(y_test, undersampled_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, undersampled_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, undersampled_pred)
print("Average Precision-Recall Score :",average_precision)

# f1 score : 0.0819614711033275
# precision : 0.04296731546088873
# recall : 0.8863636363636364
# Average Precision-Recall Score : 0.03829533421635301

In [30]:
# Modeling the undersampled data using RandomForestClassifier

undersampled = RandomForestClassifier(n_estimators=1000).fit(X_train, y_train)
undersampled_pred = undersampled.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, undersampled_pred)
print("f1 score :", f1)
precision = precision_score(y_test, undersampled_pred)
print("precision :",precision)
recall = recall_score(y_test, undersampled_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, undersampled_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, undersampled_pred)
print("Average Precision-Recall Score :",average_precision)

# f1 score : 0.14348097317529632
# precision : 0.07817811012916383
# recall : 0.8712121212121212
# Average Precision-Recall Score : 0.06834847449626065

In [31]:
# Modeling the upsampled data as is using KNeighborsClassifier

undersampled = KNeighborsClassifier(n_neighbors=2).fit(X_train, y_train)
undersampled_pred = undersampled.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, undersampled_pred)
print("f1 score :", f1)
precision = precision_score(y_test, undersampled_pred)
print("precision :",precision)
recall = recall_score(y_test, undersampled_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, undersampled_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, undersampled_pred)
print("Average Precision-Recall Score :",average_precision)

In [32]:
# Modeling the upsampled data as is using SVC

undersampled = svm.SVC().fit(X_train, y_train)
undersampled_pred = undersampled.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, undersampled_pred)
print("f1 score :", f1)
precision = precision_score(y_test, undersampled_pred)
print("precision :",precision)
recall = recall_score(y_test, undersampled_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, undersampled_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, undersampled_pred)
print("Average Precision-Recall Score :",average_precision)

In [33]:
# Modeling the upsampled data as is using MLPClassifier

undersampled = MLPClassifier(activation = "logistic").fit(X_train, y_train)
undersampled_pred = undersampled.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, undersampled_pred)
print("f1 score :", f1)
precision = precision_score(y_test, undersampled_pred)
print("precision :",precision)
recall = recall_score(y_test, undersampled_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, undersampled_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, undersampled_pred)
print("Average Precision-Recall Score :",average_precision)

## Lets try over-sampling the under-represented class using SMOTE(Synthetic Minority Over-sampling Technique) algorithm.


In [34]:
from imblearn.over_sampling import SMOTE

# Separate input features and target
y = df.Class
X = df.drop('Class', axis=1)

# setting up testing and training sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=27)

sm = SMOTE(random_state=27)#, ratio=1.0)
X_train, y_train = sm.fit_sample(X_train, y_train)

Using TensorFlow backend.


In [35]:
# Over-sampling the under-represented class using SMOTE algorithm and LogisticRegression
smote = LogisticRegression(solver='liblinear').fit(X_train, y_train)
smote_pred = smote.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, smote_pred)
print("f1 score :", f1)
precision = precision_score(y_test, smote_pred)
print("precision :",precision)
recall = recall_score(y_test, smote_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, smote_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, smote_pred)
print("Average Precision-Recall Score :",average_precision)    

# f1 score : 0.1235356762513312
# precision : 0.06643757159221077
# recall : 0.8787878787878788
# Average Precision-Recall Score : 0.058609245400303336

In [36]:
# Over-sampling the under-represented class using SMOTE algorithm and RandomForestClassifier
smote = RandomForestClassifier(n_estimators=1000).fit(X_train, y_train)
smote_pred = smote.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, smote_pred)
print("f1 score :", f1)
precision = precision_score(y_test, smote_pred)
print("precision :",precision)
recall = recall_score(y_test, smote_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, smote_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, smote_pred)
print("Average Precision-Recall Score :",average_precision)  

# f1 score : 0.865079365079365
# precision : 0.9083333333333333
# recall : 0.8257575757575758
# Average Precision-Recall Score : 0.7503861559472708

In [37]:
# Modeling the upsampled data as is using KNeighborsClassifier

smote = KNeighborsClassifier(n_neighbors=2).fit(X_train, y_train)
smote_pred = smote.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, smote_pred)
print("f1 score :", f1)
precision = precision_score(y_test, smote_pred)
print("precision :",precision)
recall = recall_score(y_test, smote_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, smote_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, smote_pred)
print("Average Precision-Recall Score :",average_precision) 

In [38]:
# Modeling the upsampled data as is using SVC

smote = svm.SVC().fit(X_train, y_train)
smote_pred = smote.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, smote_pred)
print("f1 score :", f1)
precision = precision_score(y_test, smote_pred)
print("precision :",precision)
recall = recall_score(y_test, smote_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, smote_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, smote_pred)
print("Average Precision-Recall Score :",average_precision) 

f1 score : 0.24137931034482757
precision : 0.1407035175879397
recall : 0.8484848484848485
Accuracy : 0.9901126372854695
Average Precision-Recall Score : 0.11966569378809648


In [39]:
# Modeling the upsampled data as is using MLPClassifier

smote = MLPClassifier(activation = "logistic").fit(X_train, y_train)
smote_pred = smote.predict(X_test)

# Performance metrics
f1 = f1_score(y_test, smote_pred)
print("f1 score :", f1)
precision = precision_score(y_test, smote_pred)
print("precision :",precision)
recall = recall_score(y_test, smote_pred)
print("recall :",recall)
accuracy = accuracy_score(y_test, smote_pred)
print("Accuracy :",accuracy)
average_precision = average_precision_score(y_test, smote_pred)
print("Average Precision-Recall Score :",average_precision) 