<a href="https://colab.research.google.com/github/vaibhavlokunde/AI-ML/blob/main/DiabetesPredictionModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
import pandas as pd
import matplotlib.pyplot as plt
import sklearn

In [2]:
df=pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
df.isnull().sum()

Unnamed: 0,0
Pregnancies,0
Glucose,0
BloodPressure,0
SkinThickness,0
Insulin,0
BMI,0
DiabetesPedigreeFunction,0
Age,0
Outcome,0


In [6]:
X=df.drop('Outcome',axis=1).values
y=df['Outcome'].values
X[:10],y[:10]

(array([[6.000e+00, 1.480e+02, 7.200e+01, 3.500e+01, 0.000e+00, 3.360e+01,
         6.270e-01, 5.000e+01],
        [1.000e+00, 8.500e+01, 6.600e+01, 2.900e+01, 0.000e+00, 2.660e+01,
         3.510e-01, 3.100e+01],
        [8.000e+00, 1.830e+02, 6.400e+01, 0.000e+00, 0.000e+00, 2.330e+01,
         6.720e-01, 3.200e+01],
        [1.000e+00, 8.900e+01, 6.600e+01, 2.300e+01, 9.400e+01, 2.810e+01,
         1.670e-01, 2.100e+01],
        [0.000e+00, 1.370e+02, 4.000e+01, 3.500e+01, 1.680e+02, 4.310e+01,
         2.288e+00, 3.300e+01],
        [5.000e+00, 1.160e+02, 7.400e+01, 0.000e+00, 0.000e+00, 2.560e+01,
         2.010e-01, 3.000e+01],
        [3.000e+00, 7.800e+01, 5.000e+01, 3.200e+01, 8.800e+01, 3.100e+01,
         2.480e-01, 2.600e+01],
        [1.000e+01, 1.150e+02, 0.000e+00, 0.000e+00, 0.000e+00, 3.530e+01,
         1.340e-01, 2.900e+01],
        [2.000e+00, 1.970e+02, 7.000e+01, 4.500e+01, 5.430e+02, 3.050e+01,
         1.580e-01, 5.300e+01],
        [8.000e+00, 1.250e+02, 9.600e

In [9]:
if torch.cuda.is_available():
    device='cuda'
else:
    device='cpu'
device

'cuda'

In [12]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,
                                               test_size=0.2,
                                               random_state=42)

In [8]:
len(X_train),len(X_test),len(y_train),len(y_test)

(614, 154, 614, 154)

In [13]:
X_train=torch.from_numpy(X_train).type(torch.float).to(device)
X_test=torch.from_numpy(X_test).type(torch.float).to(device)
y_train=torch.from_numpy(y_train).type(torch.LongTensor).to(device)
y_test=torch.from_numpy(y_test).type(torch.LongTensor).to(device)

In [57]:
class DiabetesModel(nn.Module):
  def __init__(self,in_features=8,hidden_units=20,out_features=2):
    super().__init__()
    self.layer_1=nn.Linear(in_features,hidden_units)
    self.relu=nn.ReLU()
    self.layer_2=nn.Linear(hidden_units,out_features)

  def forward(self,x):
    x=self.layer_1(x)
    x=self.relu(x)
    x=self.layer_2(x)
    return x

In [58]:
torch.manual_seed(42)
model_1=DiabetesModel().to(device)
model_1

DiabetesModel(
  (layer_1): Linear(in_features=8, out_features=20, bias=True)
  (relu): ReLU()
  (layer_2): Linear(in_features=20, out_features=2, bias=True)
)

In [59]:
model_1.parameters()

<generator object Module.parameters at 0x78561e7120a0>

In [60]:
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(params=model_1.parameters(),lr=0.01)

In [61]:
Y_logits=model_1(X_train).squeeze()
Y_logits[:20]

tensor([[ 2.0520, -3.2989],
        [ 3.8031, -8.2119],
        [ 5.7129, -3.4677],
        [ 2.8131, -7.5032],
        [28.0070, -2.1213],
        [ 7.3969, -4.5890],
        [ 3.5126, -5.5837],
        [10.3043, -7.1967],
        [16.0019, -4.4554],
        [ 0.1251, -4.7525],
        [ 8.9045, -5.1151],
        [ 2.4924, -7.5017],
        [ 6.2829, -5.3899],
        [ 8.9639, -6.5875],
        [ 7.7976, -5.0719],
        [ 2.0531, -6.1288],
        [ 2.4200, -7.8588],
        [ 8.1856, -5.0048],
        [ 1.5125, -6.2760],
        [20.5503, -3.9864]], device='cuda:0', grad_fn=<SliceBackward0>)

In [62]:
def accuracy(y_true,y_pred):
  correct=torch.eq(y_true,y_pred).sum().item()
  acc=correct/len(y_pred)
  return acc

In [63]:
#Training loop

epochs=100

for epoch in range(epochs):
  model_1.train()

  y_logits=model_1(X_train).squeeze()

  loss=loss_fn(y_logits,y_train)

  optimizer.zero_grad()

  loss.backward()

  optimizer.step()

  model_1.eval()
  with torch.inference_mode():
    test_logits=model_1(X_test).squeeze()
    test_pred=torch.softmax(test_logits,dim=1).argmax(dim=1)
    test_loss=loss_fn(test_logits,y_test)
    test_acc=accuracy(y_test,test_pred)

  if epoch%10==0:
    print(f"Epoch: {epoch} | Loss: {loss:.5f} | Test Loss: {test_loss:.5f} | Test Accuracy: {test_acc:.2f}%")


Epoch: 0 | Loss: 5.39746 | Test Loss: 2.39459 | Test Accuracy: 0.64%
Epoch: 10 | Loss: 1.05018 | Test Loss: 0.88024 | Test Accuracy: 0.51%
Epoch: 20 | Loss: 0.83580 | Test Loss: 0.72567 | Test Accuracy: 0.64%
Epoch: 30 | Loss: 0.72388 | Test Loss: 0.72030 | Test Accuracy: 0.62%
Epoch: 40 | Loss: 0.68463 | Test Loss: 0.71965 | Test Accuracy: 0.58%
Epoch: 50 | Loss: 0.64405 | Test Loss: 0.66199 | Test Accuracy: 0.65%
Epoch: 60 | Loss: 0.62153 | Test Loss: 0.65438 | Test Accuracy: 0.65%
Epoch: 70 | Loss: 0.60317 | Test Loss: 0.64103 | Test Accuracy: 0.67%
Epoch: 80 | Loss: 0.59036 | Test Loss: 0.63100 | Test Accuracy: 0.72%
Epoch: 90 | Loss: 0.57960 | Test Loss: 0.62562 | Test Accuracy: 0.71%


In [71]:
model_1.eval()
with torch.inference_mode():
  y_logits=model_1(X_test).squeeze()
  y_test_pred=torch.softmax(y_logits,dim=1).argmax(dim=1)

In [70]:
from sklearn.metrics import confusion_matrix
# Move y_test and y_test_pred to CPU before converting to NumPy arrays
cm = confusion_matrix(y_test.cpu(), y_test_pred.cpu())
cm

array([[79, 20],
       [21, 34]])

In [None]:
#So 79 and 34 are my right results and 20,21 are my wrong results