In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.metrics import roc_auc_score
import numpy as np

##train_five_models####
neigh = KNeighborsClassifier(n_neighbors=1,weights='distance',algorithm='kd_tree')

base_estimator = tree.DecisionTreeClassifier(min_impurity_decrease=1e-01)
abc =AdaBoostClassifier(n_estimators=60, base_estimator=base_estimator,learning_rate=1)
clf = tree.DecisionTreeClassifier(min_impurity_decrease=1e-03)
MLP = MLPClassifier(hidden_layer_sizes=(5,7,6),activation ='tanh',solver='sgd',max_iter=50)
svm = SVC(gamma='auto',kernel='rbf',probability=True)

#extract data
raw_df = pd.read_csv('../Skin_NonSkin.txt', delimiter='	',names=["val1", "val2", "val3", "class"])
cleaned_df = raw_df.copy()


#clean the data
cleaned_df['class']=np.array([0 if x=='white' else 1 for x in cleaned_df.pop('label')])
cleaned_df.pop("address")
cleaned_df.pop("year")
cleaned_df.pop("day")

train_df, test_df = train_test_split(cleaned_df, test_size=0.2)

#extract data for decision_tree
X_train_for_tree = np.array(train_df.iloc[:,:-1])
y_train_for_tree = np.array(train_df['class'])
X_test_for_tree = np.array(test_df.iloc[:,:-1])
y_test_for_tree =np.array(test_df['class'])

#extract data for other algorithms
y_train_for_others=np.array(train_df['class'])
X_train_for_others=np.array(train_df.iloc[:,:-1])
y_test_for_others=np.array(test_df['class'])
X_test_for_others=np.array(test_df.iloc[:,:-1])
scaler = StandardScaler()
X_train_for_others = scaler.fit_transform(X_train_for_others)
X_test_for_others = sclaer.fit_transform(X_test_for_others)

#train the five model
clf.fit(X_train_for_tree,y_train_for_tree)
MLP.fit(X_train_for_others,y_train_for_others)
abc.fit(X_train_for_tree,y_train_for_tree)
svm.fit(X_train_for_others,y_train_for_others)
neigh.fit(X_train_for_others,y_train_for_others)

compare_graph_0,axs_0 =  plt.subplots(1, 1, figsize=(5, 5), sharey=True)
compare_graph_1,axs_1 =  plt.subplots(1, 1, figsize=(5, 5), sharey=True)

models =[clf,MLP,abc,svm,neigh]
for model in models:
    if model =='clf' or model=='abc':
        y_test_predict_score=model.predict_proba(X_test_for_tree)
        fpr_1,tpr_1,_ = metrics.roc_curve(y_test_for_tree,y_test_predict_score[:,1],pos_label=1)
        fpr_0,tpr_0,_ = metrics.roc_curve(y_test_for_tree,y_test_predict_score[:,0],pos_label=0)
        axs_1.plot(fpr_1,tpr_1,"r",linestyle = "--",label=model+'ROC curve of class 1(area = %0.2f)' % roc_auc_score[y_test_for_tree,y_test_predict_score[:,1]])
        y_test__for_tree_flip = [0 if i==1 else 1 for i in y_test_for_tree]
        axs_0.plot(fpr_0,tpr_0,"b",linestyle = "--",label=model+'ROC curve of class 0(area = %0.2f)' % roc_auc_score[y_test_for_tree_flip,y_test_predict_score[:,0]])
    else:
        y_test_predict_score=model.predict_proba(X_test_for_others)
        fpr_1,tpr_1,_ = metrics.roc_curve(y_test_for_others,y_test_predict_score[:,1],pos_label=1)
        fpr_0,tpr_0,_ = metrics.roc_curve(y_test_for_others,y_test_predict_score[:,0],pos_label=0)
        axs_1.plot(fpr_1,tpr_1,"r",linestyle = "--",label=model+'ROC curve of class 1(area = %0.2f)' % roc_auc_score[y_test_for_others,y_test_predict_score[:,1]])
        y_test__for_others_flip = [0 if i==1 else 1 for i in y_test_for_tree]
        axs_0.plot(fpr_0,tpr_0,"b",linestyle = "--",label=model+'ROC curve of class 0(area = %0.2f)' % roc_auc_score[y_test_for_others_flip,y_test_predict_score[:,0]])
    
axs_0.set_xlabel('False Positive Rate')
axs_0.set_ylabel('True Positive Rate')
axs_1.set_xlabel('False Positive Rate')
axs_1.set_ylabel('True Positive Rate')
compare_graph_0.suptitle("ROC curve of class 0")
compare_graph_1.suptitle("ROC curve of class 1")
axs_0.legend()
axs_1.legend()
compare_graph_0.savefig("5 algorithms: ROC_curves of class 0.png")
compare_graph_1.savefig("5 algorithms: ROC_curves of class 1.png")


