In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE


data = 'diabetes_01_health_indicators.csv'
df = pd.read_csv(data)
# df_sampled = df.sample(n=200000, random_state=42)
df_sampled = df
D = df_sampled.drop('Diabetes_binary', axis=1).values
L = df_sampled['Diabetes_binary'].values
sm = SMOTE(random_state=42)
D_bal, L_bal = sm.fit_resample(D, L)

In [322]:
D_tr, D_tst, L_tr, L_tst = train_test_split(D_bal, L_bal, test_size=0.2, stratify=L_bal, random_state=42)

scaler = StandardScaler()
D_tr = scaler.fit_transform(D_tr)
D_tst = scaler.transform(D_tst)

In [323]:
D_tr = torch.tensor(D_tr, dtype=torch.float32)
L_tr = torch.tensor(L_tr, dtype=torch.float32).view(-1, 1)

D_tst = torch.tensor(D_tst, dtype=torch.float32)
L_tst = torch.tensor(L_tst, dtype=torch.float32).view(-1, 1)

In [324]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

D_tr, L_tr, D_tst, L_tst = D_tr.to(device), L_tr.to(device), D_tst.to(device), L_tst.to(device)

In [325]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(D_tr.shape[1], 16),
            nn.Sigmoid(),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.model(x)

In [326]:
class DeepNN(nn.Module):
    def __init__(self):
        super(DeepNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(D_tr.shape[1], 32),
            nn.Sigmoid(),
            nn.Dropout(0.3),
            nn.Linear(32, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 16),
            nn.Sigmoid(),
            nn.Linear(16, 16),
            nn.ReLU(),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.model(x)

In [327]:
def train(model, D_tr, L_tr, D_tst, L_tst, lr=0.01, epochs=5000):
    model = model.to(device)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(D_tr)
        loss = criterion(outputs, L_tr)
        loss.backward()
        optimizer.step()

        if epoch % 500 == 0 or epoch == 1:
            print(f"Epoch {epoch+1}/{epochs} - Loss: {loss.item():.8f}")

    model.eval()
    with torch.no_grad():
        y_pred = model(D_tst)
        y_pred_class = (y_pred > 0.5).float()
        acc = accuracy_score(L_tst.cpu(), y_pred_class.cpu())
        classificationReport = classification_report(L_tst.cpu(), y_pred_class.cpu())
        confusionMatrix = confusion_matrix(L_tst.cpu(), y_pred_class.cpu())
    
    print(f"Final Accuracy: {acc:.4f}")
    return acc, classificationReport, confusionMatrix


In [328]:
simpleNN_model = SimpleNN()
deepNN_model = DeepNN()

print("Training Simple Neural Network...")
acc_simple, classificationReport_simple, confusionMatrix_simple = train(simpleNN_model, D_tr, L_tr, D_tst, L_tst)

print("\nTraining Deep Neural Network...")
acc_deep, classificationReport_deep, confusionMatrix_deep = train(deepNN_model, D_tr, L_tr, D_tst, L_tst)

Training Simple Neural Network...
Epoch 1/5000 - Loss: 0.70085013
Epoch 2/5000 - Loss: 0.68936050
Epoch 501/5000 - Loss: 0.40199226
Epoch 1001/5000 - Loss: 0.34162593
Epoch 1501/5000 - Loss: 0.32627851
Epoch 2001/5000 - Loss: 0.31994107
Epoch 2501/5000 - Loss: 0.31622767
Epoch 3001/5000 - Loss: 0.31357539
Epoch 3501/5000 - Loss: 0.30541039
Epoch 4001/5000 - Loss: 0.30026719
Epoch 4501/5000 - Loss: 0.29686347
Final Accuracy: 0.8555

Training Deep Neural Network...
Epoch 1/5000 - Loss: 0.69311523
Epoch 2/5000 - Loss: 0.69276953
Epoch 501/5000 - Loss: 0.46347407
Epoch 1001/5000 - Loss: 0.42134956
Epoch 1501/5000 - Loss: 0.40705109
Epoch 2001/5000 - Loss: 0.39402795
Epoch 2501/5000 - Loss: 0.39039010
Epoch 3001/5000 - Loss: 0.38784999
Epoch 3501/5000 - Loss: 0.38758001
Epoch 4001/5000 - Loss: 0.38472173
Epoch 4501/5000 - Loss: 0.38270065
Final Accuracy: 0.5075


In [329]:
print(f"Simple NN Accuracy: {(acc_simple):.4f}")
print("Simple NN Classification Report:\n", classificationReport_simple)
print("Simple NN Confusion Matrix:\n", confusionMatrix_simple)
print("\n")
print(f"Deep NN Accuracy: {(acc_deep):.4f}")
print("Deep NN Classification Report:\n", classificationReport_deep)
print("Deep NN Confusion Matrix:\n", confusionMatrix_deep)

Simple NN Accuracy: 0.8555
Simple NN Classification Report:
               precision    recall  f1-score   support

         0.0       0.83      0.90      0.86     43667
         1.0       0.89      0.81      0.85     43667

    accuracy                           0.86     87334
   macro avg       0.86      0.86      0.86     87334
weighted avg       0.86      0.86      0.86     87334

Simple NN Confusion Matrix:
 [[39207  4460]
 [ 8161 35506]]


Deep NN Accuracy: 0.5075
Deep NN Classification Report:
               precision    recall  f1-score   support

         0.0       0.88      0.02      0.03     43667
         1.0       0.50      1.00      0.67     43667

    accuracy                           0.51     87334
   macro avg       0.69      0.51      0.35     87334
weighted avg       0.69      0.51      0.35     87334

Deep NN Confusion Matrix:
 [[  760 42907]
 [  108 43559]]
