In [62]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
import pandas as pd
import seaborn as sns
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn import neighbors, datasets
from sklearn.model_selection import cross_val_score
from sklearn.datasets import make_blobs
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from scipy.spatial import ConvexHull
from tqdm import tqdm
import random
plt.style.use('ggplot')
import pickle
from sklearn import tree
from sklearn.tree import export_graphviz
from joblib import dump, load
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
%matplotlib inline

In [51]:
def getAuc(X,y,test_size=0.25,max_depth=None,n_estimators=100,
           minsplit=4,FPR=[],TPR=[],VERBOSE=False, USE_ONLY=None):
    '''
        get AUC given training data X, with target labels y
    '''
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    CLASSIFIERS=[DecisionTreeClassifier(max_depth=max_depth, min_samples_split=minsplit),
                RandomForestClassifier(n_estimators=n_estimators,
                                       max_depth=max_depth,min_samples_split=minsplit),
                ExtraTreesClassifier(n_estimators=n_estimators,
                                     max_depth=max_depth,min_samples_split=minsplit),
                AdaBoostClassifier(n_estimators=n_estimators),
                GradientBoostingClassifier(n_estimators=n_estimators,max_depth=max_depth),
                svm.SVC(kernel='rbf',gamma='scale',class_weight='balanced',probability=True)]

    if USE_ONLY is not None:
        if isinstance(USE_ONLY, (list,)):
            CLASSIFIERS=[CLASSIFIERS[i] for i in USE_ONLY]
        if isinstance(USE_ONLY, (int,)):
            CLASSIFIERS=CLASSIFIERS[USE_ONLY]

    for clf in CLASSIFIERS:
        clf.fit(X_train,y_train)
        y_pred=clf.predict_proba(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred[:,1], pos_label=1)
        auc=metrics.auc(fpr, tpr)
        if VERBOSE:
            print(auc)

        FPR=np.append(FPR,fpr)
        TPR=np.append(TPR,tpr)
    points=np.array([[a[0],a[1]] for a in zip(FPR,TPR)])
    hull = ConvexHull(points)
    x=np.argsort(points[hull.vertices,:][:,0])
    auc=metrics.auc(points[hull.vertices,:][x,0],points[hull.vertices,:][x,1])
    return auc,CLASSIFIERS

In [87]:
def getConfusion(X,y,test_size=0.25,max_depth=None,n_estimators=100,
           minsplit=4,CONFUSION={},VERBOSE=False, USE_ONLY=None,target_names = None):
    '''
        get AUC given training data X, with target labels y
    '''
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    CLASSIFIERS=[DecisionTreeClassifier(max_depth=max_depth, min_samples_split=minsplit),
                RandomForestClassifier(n_estimators=n_estimators,
                                       max_depth=max_depth,min_samples_split=minsplit),
                ExtraTreesClassifier(n_estimators=n_estimators,
                                     max_depth=max_depth,min_samples_split=minsplit),
                AdaBoostClassifier(n_estimators=n_estimators),
                GradientBoostingClassifier(n_estimators=n_estimators,max_depth=max_depth),
                svm.SVC(kernel='rbf',gamma='scale',class_weight='balanced',probability=True)]

    if USE_ONLY is not None:
        if isinstance(USE_ONLY, (list,)):
            CLASSIFIERS=[CLASSIFIERS[i] for i in USE_ONLY]
        if isinstance(USE_ONLY, (int,)):
            CLASSIFIERS=CLASSIFIERS[USE_ONLY]

    for clf in CLASSIFIERS:
        clf.fit(X_train,y_train)
        y_pred=clf.predict(X_test)
        #print(y_test,y_pred)
        cmat=confusion_matrix(y_test, y_pred)
        acc=accuracy_score(y_test, y_pred)
        print(classification_report(y_test, y_pred, target_names=target_names))
        CONFUSION[clf]=cmat
        
        if VERBOSE:
            print('Confusion MAtrix:\n', cmat)
            print(' ')
            print('Accuracy:', acc)

        
    return CONFUSION

In [88]:
df=pd.read_csv('PSYCHO.DAT',header=None,index_col=0,sep='\s+')
df=df[df[1]>0]
X=df.loc[:,2:].values
y=df.loc[:,1].values.astype(str)

In [93]:
CONF=getConfusion(X,y,test_size=0.3,max_depth=None,n_estimators=1000,
           minsplit=4,VERBOSE=True, USE_ONLY=[2],target_names=['schizophrenic',
                                                               'schizoaffective',
                                                               'depressed',
                                                               'bipolar'])

                 precision    recall  f1-score   support

  schizophrenic       0.87      0.99      0.92       160
schizoaffective       0.00      0.00      0.00        15
      depressed       0.60      0.46      0.52        13
        bipolar       0.50      0.17      0.25         6

      micro avg       0.85      0.85      0.85       194
      macro avg       0.49      0.40      0.42       194
   weighted avg       0.77      0.85      0.80       194

Confusion MAtrix:
 [[158   0   2   0]
 [ 14   0   1   0]
 [  6   0   6   1]
 [  4   0   1   1]]
 
Accuracy: 0.8505154639175257


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [None]:
X.astype(str)

In [68]:
y

array(['1', '1', '1', '1', '3', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '2', '1', '1', '1', '1', '2', '2', '1', '1', '3', '1',
       '3', '4', '1', '1', '1', '1', '1', '1', '1', '1', '1', '2', '1',
       '1', '1', '2', '1', '1', '1', '2', '4', '1', '0', '0', '2', '1',
       '1', '1', '1', '2', '2', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '2', '1', '1', '1', '1', '3', '1', '1', '1', '1', '1', '1', '1',
       '2', '1', '1', '1', '2', '1', '1', '1', '3', '3', '3', '3', '3',
       '3', '4', '3', '3', '3', '1', '3', '2', '1', '1', '3', '1', '1',
       '3', '1', '1', '1', '1', '1', '1', '3', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '2', '1',
       '1', '1', '1', '3', '1', '1', '1', '1', '1', '1', '1', '3', '4',
       '1', '1', '1', '1', '1', '1', '3', '1', '1', '1', '1', '1', '3',
       '3', '3', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1