In [None]:
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, accuracy_score, roc_auc_score
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, cross_val_score
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
import os

In [None]:
os.chdir('C:/Users/manit/OneDrive/Desktop/Masters/Spring20/Sabir/Assignment_2')
df = pd.read_csv("sgemm_product.csv")
pd.DataFrame.rename(df,columns={'Run1 (ms)':'Run1','Run2 (ms)':'Run2','Run3 (ms)':'Run3', 'Run4 (ms)':'Run4'},inplace =True)
df['AvgRun']=df.apply(lambda row:(row.Run1+row.Run2+row.Run3+row.Run4)/4,axis=1)
df = df.drop(["Run1","Run2","Run3","Run4"], axis=1)

In [None]:
avg_run=np.mean(df["AvgRun"],axis=0)
print(avg_run)
df["run_class"]=np.where(df['AvgRun']>=avg_run,1,0)
y=df["run_class"].astype("category")
x=df.iloc[:,0:14]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, random_state=1)
std_Xtrain = preprocessing.scale(x_train)
std_Xtest = preprocessing.scale(x_test)

In [None]:
#Support Vector Machines
C = [0.01, 0.5, 1, 10]
scores = []
for i in C:
    clf = SVC(C=i, kernel='linear', random_state=1)
    cv = KFold(n_splits=5, random_state=1, shuffle=True)
    scr = cross_val_score(clf, std_Xtrain, y_train, cv=cv)
    print(round(scr.mean(), 5))
    scores.append(round(scr.mean(), 5))

In [None]:
plt.figure()
plt.plot(C, scores, color='red',lw=2)
plt.xticks([0.01, 1, 5, 10])
plt.xlabel("Penalty Parameter C")
plt.ylabel("Accuracy")
plt.show()
svclassifier = SVC(kernel='linear', probability=True, random_state=1, C=0.5)
svclassifier.fit(std_Xtrain, y_train)
ypred = svclassifier.predict(std_Xtest)
print(confusion_matrix(y_test, ypred))

In [None]:
print(classification_report(y_test,ypred))
print(round(accuracy_score(y_test, ypred), 5))
prob = svclassifier.predict_proba(std_Xtest)[:, 1]
fpr,tpr,thresholds = roc_curve(y_test, prob, pos_label=1)
tpr
plt.figure(figsize=(6, 4))
plt.plot(fpr, tpr, linewidth=2)
plt.plot([0, 1], [0, 1], 'k--')
plt.rcParams['font.size'] = 12
plt.title('ROC curve')
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Sensitivity)')

plt.show()
ROC_AUC = roc_auc_score(y_test, prob)
round(ROC_AUC, 5)

In [None]:
#Polynomial Kernel
degree = [2, 3, 4, 5]
scores1 = []
for i in degree:
    clf1 = SVC(C=1, kernel='poly', degree=i, random_state=1, gamma='auto')
    cv1 = KFold(n_splits=5, random_state=1, shuffle=True)
    scr1 = cross_val_score(clf1, std_Xtrain, y_train, cv=cv1)
    print(round(scr1.mean(), 5))
    scores1.append(round(scr1.mean(), 5))

In [None]:
plt.figure()

plt.plot(degree, scores1, color='red',lw=2)
plt.xticks([2, 3, 4, 5])
plt.xlabel("Degree of polynomial")
plt.ylabel("Accuracy")
plt.show()
svclassifier1 = SVC(kernel='poly', degree=3, probability=True)
svclassifier1.fit(std_Xtrain, y_train)
ypred1 = svclassifier1.predict(std_Xtest)
print(confusion_matrix(y_test, ypred1))

In [None]:
print(classification_report(y_test,ypred))
print(round(accuracy_score(y_test, ypred1), 5))
prob1 = svclassifier1.predict_proba(std_Xtest)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, prob1, pos_label=1)
tpr
plt.figure(figsize=(6, 4))
plt.plot(fpr, tpr, linewidth=2)                                                                             
plt.plot([0, 1], [0, 1], 'k--')
plt.rcParams['font.size'] = 12
plt.title('ROC curve')
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Sensitivity)')

plt.show()
ROC_AUC = roc_auc_score(y_test, prob1)
round(ROC_AUC, 5) 

In [None]:
#Radial Basis Kernel
C = [0.01, 0.5, 1, 10]
scores2 = []
for i in C:
    clf2 = SVC(kernel='rbf', random_state=1, C=i)
    cv2 = KFold(n_splits=5, random_state=1, shuffle=True)
    scr2 = cross_val_score(clf2, std_Xtrain, y_train, cv=cv2)
    print(round(scr2.mean(), 5))
    scores2.append(round(scr2.mean(), 5))

In [None]:
plt.figure()

plt.plot(C, scores2, color='red',lw=2)
plt.xticks([0.01, 1, 5, 10])
plt.xlabel("Penalty term - C")
plt.ylabel("Accuracy")
plt.show()
svclassifier2 = SVC(kernel='rbf', probability=True, C=10)
svclassifier2.fit(std_Xtrain, y_train)
ypred2 = svclassifier2.predict(std_Xtest)
print(confusion_matrix(y_test, ypred2))

In [None]:
print(classification_report(y_test,ypred))
print(round(accuracy_score(y_test, ypred2), 2))
prob2 = svclassifier2.predict_proba(std_Xtest)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, prob2, pos_label=1)
tpr
plt.figure(figsize=(6, 4))
plt.plot(fpr, tpr, linewidth=2)
plt.plot([0, 1], [0, 1], 'k--')
plt.rcParams['font.size'] = 12
plt.title('ROC curve')
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Sensitivity)')

plt.show()
ROC_AUC = roc_auc_score(y_test, prob2)
round(ROC_AUC, 5)

In [None]:
# Decision Tree Algorithm
clf_entropy = DecisionTreeClassifier(criterion="entropy", random_state=10)
clf_entropy.fit(std_Xtrain, y_train)
y_pred = clf_entropy.predict(std_Xtest)
print(confusion_matrix(y_test, y_pred))
print(round(accuracy_score(y_test, y_pred)*100,5))
clf_cv = DecisionTreeClassifier(criterion="entropy", random_state=10)
cv = KFold(n_splits=10, random_state=10, shuffle=True)
scr = cross_val_score(clf_cv, std_Xtrain, y_train, cv=cv)
print(round(scr.mean(),5))
depth = list(range(2,27 + 1,1))
dpth_scores = []
for i in depth:
    clf_cv = DecisionTreeClassifier(criterion="entropy", random_state=10, max_depth=i)
    cv = KFold(n_splits=5, random_state=10, shuffle=True)
    scr = cross_val_score(clf_cv, std_Xtrain, y_train, cv=cv)
    print(round(scr.mean(), 5))
    dpth_scores.append(round(scr.mean(), 5))

In [None]:
dpth_scores1 = []
for i in depth:
    clf_train = DecisionTreeClassifier(criterion="entropy", random_state=10, max_depth=i)
    clf_train.fit(std_Xtrain, y_train)
    y_pred1 = clf_train.predict(std_Xtrain)
    print(round(accuracy_score(y_train, y_pred1), 5))
    dpth_scores1.append(round(accuracy_score(y_train, y_pred1), 5))
plt.figure()
plt.plot(depth, dpth_scores, dpth_scores1)
plt.xticks([2, 5, 8, 10, 12, 15, 17, 20, 23, 25, 27])
plt.legend(["Cross validation accuracy", "Training Accuracy"])
plt.show()

In [None]:
#Depth=15
clf_entropy = DecisionTreeClassifier(criterion="entropy", random_state=10, max_depth=7)
clf_entropy.fit(std_Xtrain, y_train)
y_pred = clf_entropy.predict(std_Xtest)
print(confusion_matrix(y_test, y_pred))
print(round(accuracy_score(y_test, y_pred) * 100, 5))
classification_report(y_test, y_pred)
probs = clf_entropy.predict_proba(x_test)[:, 1]
fpr, tpr, th = roc_curve(y_test, probs, pos_label=1)
plt.figure(figsize=(6, 4))
plt.plot(fpr, tpr, linewidth=2)
plt.plot([0, 1], [0, 1], 'k--')
plt.rcParams['font.size'] = 12
plt.title('ROC curve')
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Sensitivity)')

plt.show()
ROC_AUC = roc_auc_score(y_test, probs)
round(ROC_AUC, 5)

In [None]:
# Gradient boosting Algorithm
n_estimate = [20, 50, 70, 100]
est_scores = []
for n in n_est:
    gb_clf = GradientBoostingClassifier(random_state=10, n_estimators=n)
    cv = KFold(n_splits=5, random_state=10, shuffle=True)
    scr_b = cross_val_score(gb_clf, std_Xtrain, y_train, cv=cv)
    est_scores.append(round(scr_b.mean(), 5))

In [None]:
plt.figure()
plt.plot(n_estimate, est_scores)
plt.xticks([20, 50, 70, 100])
plt.show()

In [None]:
alpha = [0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1]
alpha_scores = []
for a in alpha:
    gb_clf = GradientBoostingClassifier(random_state=10, n_estimators=100, learning_rate=a)
    cv = KFold(n_splits=5, random_state=10, shuffle=True)
    scr_b = cross_val_score(gb_clf, std_Xtrain, y_train, cv=cv)
    print(round(scr_b.mean(), 5))
    alpha_scores.append(round(scr_b.mean(), 5))

In [None]:
plt.figure()
plt.plot(alpha, alpha_scores)
plt.xticks([0.1, 0.5, 0.75, 1])
plt.show()

In [None]:
depth = list(range(2, 10 + 1, 1))
dpth_scores1 = []
for i in depth:
    gb_clf = GradientBoostingClassifier(random_state=10, n_estimators=100, learning_rate=0.75, max_depth=i)
    cv = KFold(n_splits=5, random_state=10, shuffle=True)
    scr = cross_val_score(gb_clf, std_Xtrain, y_train, cv=cv)
    print(round(scr.mean(), 5))
    dpth_scores1.append(round(scr.mean(), 5))

In [None]:
depth = list(range(2, 10 + 1, 1))
dpth_scores = []
for i in depth:
    gb_clf = GradientBoostingClassifier(random_state=10, n_estimators=100, learning_rate=0.75, max_depth=i)
    gb_clf.fit(std_Xtrain, y_train)
    print(round(gb_clf.score(std_Xtrain, y_train), 5))
    dpth_scores.append(round(gb_clf.score(std_Xtrain, y_train), 5))

In [None]:
plt.figure()
plt.plot(depth, dpth_scores)
plt.plot(depth, dpth_scores1)
plt.xticks([2, 4, 6, 8, 10])
plt.legend(["Training Accuracy", "Cross validation accuracy"])
plt.show()
gb_clf = GradientBoostingClassifier(random_state=10, n_estimators=100, learning_rate=0.75, max_depth=3)
gb_clf.fit(std_Xtrain, y_train)
y_pred = gb_clf.predict(std_Xtest)
print(confusion_matrix(y_test, y_pred))
print(round(accuracy_score(y_test, y_pred)*100,5))
print(classification_report(y_test, y_pred))

In [None]:
probs = gb_clf.predict_proba(std_Xtest)[:, 1]
fpr, tpr, th = roc_curve(y_test, probs, pos_label=1)
plt.figure(figsize=(6, 4))
plt.plot(fpr, tpr, linewidth=2)
plt.plot([0, 1], [0, 1], 'k--')
plt.rcParams['font.size'] = 12
plt.title('ROC curve')
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Sensitivity)')

plt.show()
ROC_AUC = roc_auc_score(y_test, probs)
round(ROC_AUC, 5)

In [None]:
# Plots for the Dataset 1
objects = ('SVM Linear', 'SVM Polynomial', 'SVM Radial', 'Decison Tree', 'Gradient Boosintg')
y_pos = np.arange(len(objects))
performance = [0.93255, 0.96133, 0.98, 0.9358, 0.9632]
plt.barh(y_pos, performance, align='center', alpha=0.5)
plt.yticks(y_pos, objects)
plt.xlabel('Accuracy')
plt.ylabel('Algorithms')
plt.title('Comparison of all models')
for i, v in enumerate(performance):
    plt.text(v + 0.001, i, str(v), color='blue', ha='right', va='center')
plt.show()


In [None]:
# ROC curves for first dataset
svclassifier = SVC(kernel='linear', probability=True, random_state=1, C=0.5)
svclassifier.fit(std_Xtrain, y_train)
prob1 = svclassifier.predict_proba(std_Xtest)[:, 1]

In [None]:
# Polynomial kernel
svclassifier1 = SVC(kernel='poly', degree=3, probability=True)
svclassifier1.fit(std_Xtrain, y_train)
prob2 = svclassifier1.predict_proba(std_Xtest)[:, 1]

In [None]:
# Radial Basis Kernel
svclassifier2 = SVC(kernel='rbf', probability=True, C=10)
svclassifier2.fit(std_Xtrain, y_train)
prob3 = svclassifier2.predict_proba(std_Xtest)[:, 1]

In [None]:
# Depth=15
clf_entropy = DecisionTreeClassifier(criterion="entropy", random_state=10, max_depth=15)
clf_entropy.fit(std_Xtrain1, y_train1)
prob4 = clf_entropy.predict_proba(std_Xtest1)[:, 1]

In [None]:
gb_clf = GradientBoostingClassifier(random_state=10, n_estimators=100, learning_rate=0.75, max_depth=3)
gb_clf.fit(std_Xtrain, y_train)
prob5 = gb_clf.predict_proba(std_Xtest)[:, 1]

fpr1, tpr1, th1 = roc_curve(y_test, prob1, pos_label=1)
fpr2, tpr2, th2 = roc_curve(y_test, prob2, pos_label=1)
fpr3, tpr3, th3 = roc_curve(y_test, prob3, pos_label=1)
fpr4, tpr4, th4 = roc_curve(y_test, prob4, pos_label=1)
fpr5, tpr5, th5 = roc_curve(y_test, prob5, pos_label=1)

plt.figure(figsize=(6, 4))

plt.plot(fpr1, tpr1, linewidth=2)
plt.plot(fpr2, tpr2, linewidth=2)
plt.plot(fpr3, tpr3, linewidth=2)
plt.plot(fpr4, tpr4, linewidth=2)
plt.plot(fpr5, tpr5, linewidth=2)

plt.plot([0, 1], [0, 1], 'k--')
plt.rcParams['font.size'] = 12
plt.title('ROC curve comparison for all Algorithms')
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Sensitivity)')
plt.legend(["SVM-Linear_AUC=0.96947", "SVM-Polynomial_AUC=0.9849", "SVM-Radial_AUC=0.99845", "DecisonTree_AUC= 0.5",
     "GradientBoosting_AUC= 0.9882"])

plt.show()