In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
import numpy as np

data = pd.read_csv("diabetes.csv")
y = data['Outcome']
x = data[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']]


print(data['Glucose'].median())

#Replace 0 with Mean Value
glucose_mean = data['Glucose'].median()
blood_pressure_mean = data['BloodPressure'].median()
skin_mean = data['SkinThickness'].median()
insulin_mean = data['Insulin'].median()
bmi_mean = data['BMI'].median()
def clean_glucose(x):
    return glucose_mean if x==0 else x
def clean_bloodpressure(x):
    return blood_pressure_mean if x==0 else x
def clean_skin(x):
    return skin_mean if x==0 else x
def clean_insulin(x):
    return insulin_mean if x==0 else x
def clean_bmi(x):
    return bmi_mean if x==0 else x

data['Glucose'] = data['Glucose'].apply(clean_glucose)
data['BloodPressure'] = data['BloodPressure'].apply(clean_bloodpressure)
data['SkinThickness'] = data['SkinThickness'].apply(clean_skin)
data['Insulin'] = data['Insulin'].apply(clean_insulin)
data['BMI'] = data['BMI'].apply(clean_bmi)
print(data.head())


117.0
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6    148.0           72.0           35.0     30.5  33.6   
1            1     85.0           66.0           29.0     30.5  26.6   
2            8    183.0           64.0           23.0     30.5  23.3   
3            1     89.0           66.0           23.0     94.0  28.1   
4            0    137.0           40.0           35.0    168.0  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [2]:
#Convert categorical variable into dummy/indicator variables.
x = pd.get_dummies(x)
x_train,x_try,y_train,y_try = train_test_split(x,y,train_size=0.8,random_state=42)
x_test,x_val,y_test,y_val = train_test_split(x_try,y_try,train_size = 0.5, random_state=42)
#scaling and standardizing our training and test data.
ct = ColumnTransformer([("numeric", StandardScaler(),['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age'])])
x_train = ct.fit_transform(x_train)
x_test = ct.fit_transform(x_test)
le = LabelEncoder()
y_train = to_categorical(le.fit_transform(y_train))
y_test = to_categorical(le.fit_transform(y_test))

In [3]:
#Build model
model = Sequential()
model.add(Dense(64,input_dim=8,activation='relu'))
model.add(Dense(32,activation='relu'))
model.add(Dense(2,activation='softmax'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.fit(x_train,y_train,epochs=11,batch_size=12)

y_estimate = np.argmax(model.predict(x_test),axis=1)
y_true = np.argmax(y_test,axis=1)
print(classification_report(y_true,y_estimate))

Epoch 1/11
Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
              precision    recall  f1-score   support

           0       0.81      0.81      0.81        52
           1       0.60      0.60      0.60        25

    accuracy                           0.74        77
   macro avg       0.70      0.70      0.70        77
weighted avg       0.74      0.74      0.74        77



In [4]:
x_val = ct.transform(x_val)
y_val = to_categorical(le.transform(y_val))
y_estimate2 = np.argmax(model.predict(x_val),axis=1)
y_true2 = np.argmax(y_val,axis=1)
print(classification_report(y_true2,y_estimate2))

              precision    recall  f1-score   support

           0       0.78      0.85      0.82        47
           1       0.73      0.63      0.68        30

    accuracy                           0.77        77
   macro avg       0.76      0.74      0.75        77
weighted avg       0.76      0.77      0.76        77

