# Pytorch - v03 - Pima Diabetes

This dataset describes the medical records for Pima Indians
and whether or not each patient will have an onset of diabetes within
ve years.

Fields description follow:

preg = Number of times pregnant

plas = Plasma glucose concentration a 2 hours in an oral glucose tolerance test

pres = Diastolic blood pressure (mm Hg)

skin = Triceps skin fold thickness (mm)

test = 2-Hour serum insulin (mu U/ml)

mass = Body mass index (weight in kg/(height in m)^2)

pedi = Diabetes pedigree function

age = Age (years)

class = Class variable (1:tested positive for diabetes, 0: tested negative for diabetes)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# Definindo a rede neural simples
class SimpleNN(nn.Module):
    def __init__(self,input_features=8,hidden1=20, hidden2=10, out_features=2):
        super(SimpleNN, self).__init__()
        self.f_connected1 = nn.Linear(input_features,hidden1)
        self.f_connected2 = nn.Linear(hidden1,hidden2)
        self.out = nn.Linear(hidden2,out_features)

    def forward(self,x):
        x = F.relu(self.f_connected1(x))
        x = F.relu(self.f_connected2(x))
        x = self.out(x)
        return x

# Criando o modelo
model = SimpleNN()

# Definindo o otimizador e a função de perda
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
loss_function = nn.CrossEntropyLoss()

In [None]:
# carregar o dataset
#!/bin/bash
!curl -L -o archive.zip\
https://www.kaggle.com/api/v1/datasets/download/kumargh/pimaindiansdiabetescsv
!unzip archive.zip
!ls -ilah

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  9103  100  9103    0     0  16023      0 --:--:-- --:--:-- --:--:-- 36705
Archive:  archive.zip
replace pima-indians-diabetes.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: pima-indians-diabetes.csv  
total 52K
4849687 drwxr-xr-x 1 root root 4.0K Oct 25 23:02 .
6029362 drwxr-xr-x 1 root root 4.0K Oct 25 22:42 ..
6035639 -rw-r--r-- 1 root root 8.9K Oct 25 23:02 archive.zip
 131091 drwxr-xr-x 4 root root 4.0K Oct 24 13:20 .config
6035640 -rw-r--r-- 1 root root  24K Sep 27  2019 pima-indians-diabetes.csv
4849688 drwxr-xr-x 1 root root 4.0K Oct 24 13:20 sample_data


In [None]:
# class = Class variable (1:tested positive for diabetes, 0: tested negative for diabetes)
atributos = [ 'preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class' ]
df = pd.read_csv('pima-indians-diabetes.csv', header=None)
df.columns = atributos
df.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [None]:
# split into input (X) and output (Y) variables​
X = df[ ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'] ].values
y = df['class'].values
print(X[:3])
print(y[:3])

[[  6.    148.     72.     35.      0.     33.6     0.627  50.   ]
 [  1.     85.     66.     29.      0.     26.6     0.351  31.   ]
 [  8.    183.     64.      0.      0.     23.3     0.672  32.   ]]
[1 0 1]


In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y , test_size =0.2,random_state=0)

# Creating Tensors

X_train=torch.FloatTensor(X_train)
X_test=torch.FloatTensor(X_test)
y_train=torch.LongTensor(y_train)
y_test=torch.LongTensor(y_test)

# X=torch.FloatTensor(X)
# y=torch.LongTensor(y)

In [None]:
# data = torch.tensor(X, dtype=torch.float32)
# target = torch.tensor(y, dtype=torch.float32)

# Treinamento do modelo
# for epoch in range(5000):
#     model.train()
#     optimizer.zero_grad() # zera os gradientes antes do Backpropagation
#     output = model(data) # faz a predição
#     loss = loss_function(output, target) #.view(-1, 1)) # calcula a perda
#     loss.backward() # Backpropagation
#     optimizer.step() # atualiza os pesos
#     if epoch % 100 == 0:
#         print(f'Epoch {epoch}, Loss: {loss.item()}')

# # testando o modelo
# model.eval()
# with torch.no_grad():
#     predicted = model(data)
#     predicted = (output > 0.5).float()
#     accuracy = (predicted == target).float().mean()
#     print(f'Accuracy: {accuracy.item()}')

# Treinamento
epochs=500
final_losses=[]
for i in range(epochs):
    i= i+1
    y_pred=model.forward(X_train)
    loss=loss_function(y_pred,y_train)
    final_losses.append(loss)
    if i % 100 == 1:
        print("Epoch number: {} and the loss : {}".format(i,loss.item()))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Previsões
predictions = []
with torch.no_grad():
    for i,data in enumerate(X_test):
        y_pred = model(data)
        predictions.append(y_pred.argmax().item())


accuracy = accuracy_score(y_test,predictions)
print('\nAcurácia: ', accuracy)

cm = confusion_matrix(y_test,predictions)
print('\nMatriz de Confusão')
print(cm)

Epoch number: 1 and the loss : 0.3406488299369812
Epoch number: 101 and the loss : 0.328434020280838
Epoch number: 201 and the loss : 0.30803611874580383
Epoch number: 301 and the loss : 0.308881938457489
Epoch number: 401 and the loss : 0.29183271527290344

Acurácia:  0.7207792207792207

Matriz de Confusão
[[83 24]
 [19 28]]


![image](https://github.com/vladimiralencar/Projects-2024/blob/main/pytorch/pytorch-v02.png?raw=true)

# Código Completo

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Definindo a rede neural simples
class SimpleNN(nn.Module):
    def __init__(self,input_features=8,hidden1=20, hidden2=10, out_features=2):
        super(SimpleNN, self).__init__()
        self.f_connected1 = nn.Linear(input_features,hidden1)
        self.f_connected2 = nn.Linear(hidden1,hidden2)
        self.out = nn.Linear(hidden2,out_features)

    def forward(self,x):
        x = F.relu(self.f_connected1(x))
        x = F.relu(self.f_connected2(x))
        x = self.out(x)
        return x

# Criando o modelo
model = SimpleNN()

# Definindo o otimizador e a função de perda
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
loss_function = nn.CrossEntropyLoss()

# lê o arquivo
atributos = [ 'preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class' ]
url = 'https://raw.githubusercontent.com/vladimiralencar/Projects-2024/refs/heads/main/pytorch/pima-indians-diabetes.csv'
df = pd.read_csv(url, header=None)
df.columns = atributos

# split into input (X) and output (Y) variables​
X = df[ ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'] ].values
y = df['class'].values

# Split para treino e teste
from sklearn.model_selection import train_test_split
test_size = 0.15
X_train,X_test,y_train,y_test = train_test_split(X,y , test_size=test_size,random_state=0)

# Creating Tensors
X_train=torch.FloatTensor(X_train)
X_test=torch.FloatTensor(X_test)
y_train=torch.LongTensor(y_train)
y_test=torch.LongTensor(y_test)

# Treinamento
epochs=500
final_losses=[]
for i in range(epochs):
    i= i+1
    y_pred=model.forward(X_train)
    loss=loss_function(y_pred,y_train)
    final_losses.append(loss)
    if i % 100 == 1:
        print("Epoch number: {} and the loss : {}".format(i,loss.item()))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Previsões - Dados de Teste
predictions = []
with torch.no_grad():
    for i,data in enumerate(X_test):
        y_pred = model(data)
        predictions.append(y_pred.argmax().item())


accuracy = accuracy_score(y_test,predictions)
print('\nAcurácia: ', accuracy)
cm = confusion_matrix(y_test,predictions)
print('\nMatriz de Confusão')
print(cm)

Epoch number: 1 and the loss : 2.7328174114227295
Epoch number: 101 and the loss : 0.519047737121582
Epoch number: 201 and the loss : 0.4511554539203644
Epoch number: 301 and the loss : 0.4228440821170807
Epoch number: 401 and the loss : 0.3946801722049713

Acurácia:  0.8017241379310345

Matriz de Confusão
[[65 13]
 [10 28]]
