In [63]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [64]:
dataset = pd.read_csv('./Dataset/diabetes.csv')

In [65]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [66]:
dataset.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [67]:
dataset.duplicated().sum()

0

In [68]:
dataset.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [69]:
X = dataset.iloc[:,:-1].values
Y = dataset.iloc[:,-1].values

In [70]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

In [71]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [72]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score

In [73]:
lrc = LogisticRegression()
svc = SVC(kernel='linear')
gnb = GaussianNB()
dtc = DecisionTreeClassifier(criterion= 'entropy')
knc = KNeighborsClassifier(n_neighbors = 5, metric='minkowski', p = 2)
rfc = RandomForestClassifier(n_estimators = 50, criterion= 'entropy')

In [74]:
clfs = {
    'LR': lrc, 
    'SVC' : svc,
    'NB': gnb, 
    'DT': dtc, 
    'KN' : knc, 
    'RF': rfc,
}

In [75]:
def train_classifier(clf, X_train, Y_train, X_test, Y_test):
    clf.fit(X_train,Y_train)
    y_pred = clf.predict(X_test)
    print(clf.predict(sc.transform([[5,166,72,19,175,25.8,0.587,51]])),"predicted Value")
    accuracy = accuracy_score(Y_test,y_pred)
    precision = precision_score(Y_test,y_pred)
    return accuracy, precision

In [76]:
accuracy_scores = []
precision_scores = []

for name,clf in clfs.items():
    current_accuracy,current_precision = train_classifier(clf, X_train,Y_train,X_test,Y_test)
    print("For name" , name)
    print("For Accuracy" , current_accuracy)
    print("For Precision" , current_precision)
    accuracy_scores.append(current_accuracy)
    precision_scores.append(current_precision)

[1] predicted Value
For name LR
For Accuracy 0.8246753246753247
For Precision 0.7631578947368421
[1] predicted Value
For name SVC
For Accuracy 0.8246753246753247
For Precision 0.7631578947368421
[1] predicted Value
For name NB
For Accuracy 0.7922077922077922
For Precision 0.6744186046511628
[1] predicted Value
For name DT
For Accuracy 0.7207792207792207
For Precision 0.5333333333333333
[1] predicted Value
For name KN
For Accuracy 0.7987012987012987
For Precision 0.6818181818181818
[1] predicted Value
For name RF
For Accuracy 0.8311688311688312
For Precision 0.7333333333333333


In [77]:
performance_df = pd.DataFrame({'Algorithm': clfs.keys(), "Accuracy" : accuracy_scores, "Precision" : precision_scores})

In [78]:
performance_df

Unnamed: 0,Algorithm,Accuracy,Precision
0,LR,0.824675,0.763158
1,SVC,0.824675,0.763158
2,NB,0.792208,0.674419
3,DT,0.720779,0.533333
4,KN,0.798701,0.681818
5,RF,0.831169,0.733333
