In [147]:
import numpy as np 
import pandas as pd 
import seaborn as sns
sns.set(color_codes=True)
import matplotlib.pyplot as plt
%matplotlib inline
import collections
import math

In [148]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score as acs
from sklearn.svm import SVC



In [172]:
from prettytable import PrettyTable
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.callbacks import EarlyStopping
from keras.layers.core import Dense,Activation,Dropout
import warnings
warnings.filterwarnings("ignore")

# Pre-Processing of the data

## Dataset Used (Breast Cancer Diagnostic)

In [157]:
data=pd.read_csv("data.csv")
data.drop(["Unnamed: 32","id"],axis=1,inplace=True)
display(data.head(20))

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678
5,M,12.45,15.7,82.57,477.1,0.1278,0.17,0.1578,0.08089,0.2087,...,15.47,23.75,103.4,741.6,0.1791,0.5249,0.5355,0.1741,0.3985,0.1244
6,M,18.25,19.98,119.6,1040.0,0.09463,0.109,0.1127,0.074,0.1794,...,22.88,27.66,153.2,1606.0,0.1442,0.2576,0.3784,0.1932,0.3063,0.08368
7,M,13.71,20.83,90.2,577.9,0.1189,0.1645,0.09366,0.05985,0.2196,...,17.06,28.14,110.6,897.0,0.1654,0.3682,0.2678,0.1556,0.3196,0.1151
8,M,13.0,21.82,87.5,519.8,0.1273,0.1932,0.1859,0.09353,0.235,...,15.49,30.73,106.2,739.3,0.1703,0.5401,0.539,0.206,0.4378,0.1072
9,M,12.46,24.04,83.97,475.9,0.1186,0.2396,0.2273,0.08543,0.203,...,15.09,40.68,97.65,711.4,0.1853,1.058,1.105,0.221,0.4366,0.2075


In [158]:
data.shape

(569, 31)

In [216]:
X=data.iloc[:,1:32]
y=data.iloc[:,0]

In [217]:
scaler=StandardScaler()
X=scaler.fit_transform(X)

# Function To Calcuate Metrices 

# ML Models

In [218]:
def data_met(tn, fp, fn, tp):
    result = []
    result.append(tn)
    result.append(fp)
    result.append(fn)
    result.append(tp)
    tpr = tp/(tp+fn)
    result.append(tpr)
    tnr = tn/(tn+fp)
    result.append(tnr)
    fpr = fp/(tn+fp)
    result.append(fpr)
    fnr = fn/(tp+fn)
    result.append(fnr)
    recall = tp/(tp+fn)
    result.append(recall)
    precision = tp/(tp+fp)
    result.append(precision)
    f1 = (2*tp)/(2*tp+fp+fn)
    result.append(f1)
    acc = (tp+tn)/(tp+fp+fn+tn)
    result.append(acc)
    err = (fp+fn)/(tp+fp+fn+tn)
    result.append(err)
    bacc = (tpr+tnr)/2
    result.append(bacc)
    tss = tp/(tp+fn) - fp/(fp+tn)
    result.append(tss)
    hss = 2*(tp*tn - fp*fn)/((tp+fn)*(fn+tn) + (tp+fp)*(fp+tn))
    result.append(hss)
    return np.array(result)

In [233]:
cross_validation_folds = KFold(n_splits=10,shuffle=True, random_state=3030)
fold = 0
for train_index, test_index in cross_validation_folds.split(X, y):
    fold += 1
    print("Fold", str(fold))
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    #Random Forrest
    print("\tRandom Forest model result:")
    rf = RandomForestClassifier(max_depth=5, random_state=0)
    rf.fit(X_train, y_train)
    y_pred_rf = rf.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred_rf).ravel()
    rf_result = data_met(tn, fp, fn, tp)
    print("\t\tTrue negative:", rf_result[0])
    print("\t\tFalse positive:", rf_result[1])
    print("\t\tFalse negative:", rf_result[2])
    print("\t\tTrue positive:", rf_result[3])
    print("\t\tTrue positive rate:", rf_result[4])
    print("\t\tTrue negative rate:", rf_result[5])
    print("\t\tFalse positive rate:", rf_result[6])
    print("\t\tFalse negative rate:", rf_result[7])
    print("\t\tRecall:", rf_result[8])
    print("\t\tPrecision:", rf_result[9])
    print("\t\tF1:", rf_result[10])
    print("\t\tAccuracy:", rf_result[11])
    print("\t\tError Rate:", rf_result[12]) 
    print("\t\tBalance Accuracy:", rf_result[13])
    print("\t\tTrue skill statistics:", rf_result[14])
    print("\t\tHeidke skill score:", rf_result[15])
     #SVM
    print("\tSVM model result:")
    svc = SVC(gamma='auto')
    svc.fit(X_train, y_train)
    y_pred_svc = svc.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred_svc).ravel()
    svc_result = data_met(tn, fp, fn, tp)
    print("\t\tTrue negative:", svc_result[0])
    print("\t\tFalse positive:", svc_result[1])
    print("\t\tFalse negative:", svc_result[2])
    print("\t\tTrue positive:", svc_result[3])
    print("\t\tTrue positive rate:", svc_result[4])
    print("\t\tTrue negative rate:", svc_result[5])
    print("\t\tFalse positive rate:", svc_result[6])
    print("\t\tFalse negative rate:", svc_result[7])
    print("\t\tRecall:", svc_result[8])
    print("\t\tPrecision:", svc_result[9])
    print("\t\tF1:", svc_result[10])
    print("\t\tAccuracy:", svc_result[11])
    print("\t\tError Rate:", svc_result[12]) 
    print("\t\tBalance Accuracy:", svc_result[13])
    print("\t\tTrue skill statistics:", svc_result[14])
    print("\t\tHeidke skill score:", svc_result[15])
    print("\n\n")
    ##lstm
    #print (X[:5])
    #print (y[:5],set(y))
    #print (y[:5],set(y))
    
    
    X_bar = X.copy()
    y_bar = np.array([1 if k == 'B' else 0 for k in list(y)])
    
    X_train_fold = np.array(X_bar[train_index])
    y_train_fold = np.array(y_bar[train_index]).reshape(-1,1).astype(int)
    X_val_fold = np.array(X_bar[test_index])
    y_val_fold = np.array(y_bar[test_index]).reshape(-1,1).astype(int)
    
    X_train_fold = np.reshape(X_train_fold,(X_train_fold.shape[0], 1, X_train_fold.shape[1])).astype(int)
    X_val_fold = np.reshape(X_val_fold, (X_val_fold.shape[0], 1, X_val_fold.shape[1])).astype(int)
    
    model = Sequential()

    model.add(LSTM(60, return_sequences=True, input_shape=(1,30)))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.add(Activation('Softmax'))
    model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])
    history = model.fit(X_train_fold, y_train_fold, epochs=5, batch_size=20, validation_data=(X_val_fold, y_val_fold),
                        verbose=1, shuffle=False)
    
    ## Report LSTM Result
    y_pred_lstm = model.predict(X_val_fold)
    y_pred_lstm = np.array([t[0][0] for t in y_pred_lstm]).ravel()
    y_val_fold = np.array(y_bar[test_index]).ravel()
    tn, fp, fn, tp = confusion_matrix(y_val_fold, y_pred_lstm).ravel()
    lstm_result = data_met(tn, fp, fn, tp)
    print("LSTM:\n\n")
    print("\t\tTrue negative:", lstm_result[0])
    print("\t\tFalse positive:", lstm_result[1])
    print("\t\tFalse negative:", lstm_result[2])
    print("\t\tTrue positive:", lstm_result[3])
    print("\t\tTrue positive rate:", lstm_result[4])
    print("\t\tTrue negative rate:", lstm_result[5])
    print("\t\tFalse positive rate:", lstm_result[6])
    print("\t\tFalse negative rate:", lstm_result[7])
    print("\t\tRecall:", lstm_result[8])
    print("\t\tPrecision:", lstm_result[9])
    print("\t\tF1:", lstm_result[10])
    print("\t\tAccuracy:", lstm_result[11])
    print("\t\tError Rate:", lstm_result[12]) 
    print("\t\tBalance Accuracy:", lstm_result[13])
    print("\t\tTrue skill statistics:", lstm_result[14])
    print("\t\tHeidke skill score:", lstm_result[15])
    
    
    table=PrettyTable()
    table.field_names = ['Model','TN','FP','FN','TP','TPR','TNR','FPR','FNR','recall','precision','F1','accuracy','ER','BA','TSS','HSS']
    table.add_row(['RandomForest',rf_result[0],rf_result[1],rf_result[2],rf_result[3],rf_result[4],rf_result[5],rf_result[6],rf_result[7],rf_result[8],rf_result[9],rf_result[10],rf_result[11],rf_result[12],rf_result[13],rf_result[14],rf_result[15]])
    table.add_row(['SVM',svc_result[0],svc_result[1],svc_result[2],svc_result[3],svc_result[4],svc_result[5],svc_result[6],svc_result[7],svc_result[8],svc_result[9],svc_result[10],svc_result[11],svc_result[12],svc_result[13],svc_result[14],svc_result[15]])
    table.add_row(['lstm',lstm_result[0],lstm_result[1],lstm_result[2],lstm_result[3],lstm_result[4],lstm_result[5],lstm_result[6],lstm_result[7],lstm_result[8],lstm_result[9],svc_result[10],lstm_result[11],lstm_result[12],lstm_result[13],lstm_result[14],lstm_result[15]])
    print(table)
    print("\n\n")

Fold 1
	Random Forest model result:
		True negative: 33.0
		False positive: 0.0
		False negative: 3.0
		True positive: 21.0
		True positive rate: 0.875
		True negative rate: 1.0
		False positive rate: 0.0
		False negative rate: 0.125
		Recall: 0.875
		Precision: 1.0
		F1: 0.9333333333333333
		Accuracy: 0.9473684210526315
		Error Rate: 0.05263157894736842
		Balance Accuracy: 0.9375
		True skill statistics: 0.875
		Heidke skill score: 0.8901734104046243
	SVM model result:
		True negative: 32.0
		False positive: 1.0
		False negative: 0.0
		True positive: 24.0
		True positive rate: 1.0
		True negative rate: 0.9696969696969697
		False positive rate: 0.030303030303030304
		False negative rate: 0.0
		Recall: 1.0
		Precision: 0.96
		F1: 0.9795918367346939
		Accuracy: 0.9824561403508771
		Error Rate: 0.017543859649122806
		Balance Accuracy: 0.9848484848484849
		True skill statistics: 0.9696969696969697
		Heidke skill score: 0.9642184557438794



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
LSTM:


		True negative: 0.0
		False positive: 21.0
		False negative: 0.0
		True positive: 36.0
		True positive rate: 1.0
		True negative rate: 0.0
		False positive rate: 1.0
		False negative rate: 0.0
		Recall: 1.0
		Precision: 0.631578947368421
		F1: 0.7741935483870968
		Accuracy: 0.631578947368421
		Error Rate: 0.3684210526315789
		Balance Accuracy: 0.5
		True skill statistics: 0.0
		Heidke skill score: 0.0
+--------------+------+------+-----+------+--------------------+--------------------+----------------------+----------------------+--------------------+--------------------+--------------------+--------------------+----------------------+--------------------+--------------------+--------------------+
|    Model     |  TN  |  FP  |  FN |  TP  |        TPR         |        TNR         |         FPR          |         FNR          |       recall       |     precision      |         F1         |      accuracy      |          ER       

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
LSTM:


		True negative: 0.0
		False positive: 23.0
		False negative: 0.0
		True positive: 34.0
		True positive rate: 1.0
		True negative rate: 0.0
		False positive rate: 1.0
		False negative rate: 0.0
		Recall: 1.0
		Precision: 0.5964912280701754
		F1: 0.7472527472527473
		Accuracy: 0.5964912280701754
		Error Rate: 0.40350877192982454
		Balance Accuracy: 0.5
		True skill statistics: 0.0
		Heidke skill score: 0.0
+--------------+------+------+-----+------+--------------------+-----+-----+---------------------+--------------------+--------------------+--------------------+--------------------+---------------------+--------------------+--------------------+--------------------+
|    Model     |  TN  |  FP  |  FN |  TP  |        TPR         | TNR | FPR |         FNR         |       recall       |     precision      |         F1         |      accuracy      |          ER         |         BA         |        TSS         |        HSS        

Epoch 4/5
Epoch 5/5
LSTM:


		True negative: 0.0
		False positive: 17.0
		False negative: 0.0
		True positive: 40.0
		True positive rate: 1.0
		True negative rate: 0.0
		False positive rate: 1.0
		False negative rate: 0.0
		Recall: 1.0
		Precision: 0.7017543859649122
		F1: 0.8247422680412371
		Accuracy: 0.7017543859649122
		Error Rate: 0.2982456140350877
		Balance Accuracy: 0.5
		True skill statistics: 0.0
		Heidke skill score: 0.0
+--------------+------+------+-----+------+--------------------+-------+-------+----------------------+--------------------+--------------------+--------------------+--------------------+----------------------+--------------------+--------------------+--------------------+
|    Model     |  TN  |  FP  |  FN |  TP  |        TPR         |  TNR  |  FPR  |         FNR          |       recall       |     precision      |         F1         |      accuracy      |          ER          |         BA         |        TSS         |        HSS         |
+--------------+

LSTM:


		True negative: 0.0
		False positive: 24.0
		False negative: 0.0
		True positive: 33.0
		True positive rate: 1.0
		True negative rate: 0.0
		False positive rate: 1.0
		False negative rate: 0.0
		Recall: 1.0
		Precision: 0.5789473684210527
		F1: 0.7333333333333333
		Accuracy: 0.5789473684210527
		Error Rate: 0.42105263157894735
		Balance Accuracy: 0.5
		True skill statistics: 0.0
		Heidke skill score: 0.0
+--------------+------+------+-----+------+--------------------+-----+-----+---------------------+--------------------+--------------------+--------------------+--------------------+---------------------+--------------------+--------------------+--------------------+
|    Model     |  TN  |  FP  |  FN |  TP  |        TPR         | TNR | FPR |         FNR         |       recall       |     precision      |         F1         |      accuracy      |          ER         |         BA         |        TSS         |        HSS         |
+--------------+------+------+-----+------+----