## Compare  the Performance of Classifiers using ROC Curves
**(Binary Classification**)


**Required Python Libraries**\
  -pandas\
  -numpy\
  -scikit-learn\
  -matplotlib

### Import libraries for the analysis 

In [None]:
#data exploration and preprocessing
import pandas as pd        
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

#data visualization
import matplotlib.pyplot as plt

#classifiers
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

#model evaluation
from sklearn.metrics import roc_curve,auc



### Read the data

In [None]:
#set path to the data file.
data_file='https://raw.githubusercontent.com/vappiah/Machine-Learning-Tutorials/main/data/diabetes-pima.csv'

In [None]:
#read the data with pandas
dataframe=pd.read_csv(data_file)  

### Data Preprocessing

In [None]:
# separate the labels/classes from the features/measurement
X=dataframe.iloc[:,:-1]
y=dataframe.iloc[:,-1]

### Split data into train and test sets

In [None]:
# train test ratio 80:20

In [None]:
from sklearn.model_selection import train_test_split
Xtrain,Xtest,ytrain,ytest=train_test_split(X,y,test_size=0.2)

### Normalization of the data

In [None]:
# scale data between 0 and 1

In [None]:
min_max_scaler=MinMaxScaler()
Xtrain=min_max_scaler.fit_transform(Xtrain)
Xtest=min_max_scaler.fit_transform(Xtest)

### Training Phase

In [None]:
#random forest classifier
rf=RandomForestClassifier(max_features=0.2)
rf.fit(Xtrain,ytrain)

In [None]:
#support vector machine classifier
svm=LinearSVC()
svm.fit(Xtrain,ytrain)

In [None]:
#K-nearest Neighbor classifier
knn=KNeighborsClassifier()
knn.fit(Xtrain,ytrain)

### Plot ROC Curve for each classifier

In [None]:

def plot_roc(xtest,ytest,models):
    
    #models object should be a dictionary comprising of name of model and the model object
    for name,model in models.items():

        if hasattr(model,'decision_function'):
            probs=model.decision_function(xtest) 
        elif hasattr(model,'predict_proba'):
            probs=model.predict_proba(xtest) [:,1]
        fpr,tpr,threshold=roc_curve(ytest,probs)
        roc_auc=auc(fpr,tpr)
        print('ROC AUC=%0.2f'%roc_auc)
        plt.plot(fpr,tpr,label='%s (AUC=%0.2f)'%(name,roc_auc))
        
    plt.legend(loc='lower right')
    plt.plot([0,1],[0,1],'b--')
    plt.xlim([0,1])
    plt.ylim([0,1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.show()


In [None]:
models={'rf':rf,'svm':svm,'knn':knn}
plot_roc(Xtest,ytest,models)