In [1]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
df = pd.read_csv('pyto/chap09/data/diabetes.csv')
X = df[df.columns[:-1]]
y = df['Outcome']

X = X.values
y = torch.tensor(y.values)
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.33)

In [5]:
ms= MinMaxScaler()
ss= StandardScaler()

X_train = ss.fit_transform(X_train)
X_test = ss.fit_transform(X_test)
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)
y_train = ms.fit_transform(y_train)
y_test= ms.fit_transform(y_test)

In [6]:
class customdataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.len = len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

    def __len__(self):
        return self.len

In [7]:
train_data = customdataset(torch.FloatTensor(X_train),torch.FloatTensor(y_train))
test_data = customdataset(torch.FloatTensor(X_test),torch.FloatTensor(y_test))

train_loader = DataLoader(dataset=train_data, batch_size=64,shuffle=True)
test_loader = DataLoader(dataset=test_data,batch_size=64, shuffle=False)

In [14]:
class binaryClassification(nn.Module):
    def __init__(self):
        super(binaryClassification, self).__init__()
        self.layer_1 = nn.Linear(8, 64, bias=True)
        self.layer_2 = nn.Linear(64, 64, bias=True)
        self.layer_out = nn.Linear(64, 1, bias=True)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(64)
        self.batchnorm2 = nn.BatchNorm1d(64)

        self.relu=nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(64)
        self.batchnorm2 = nn.BatchNorm1d(64)

    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        return x

In [19]:
epochs = 1000+1
print_epoch =100
LEARNING_RATE =1e-4

model = binaryClassification()
model.to(device)
print(model)
BCE = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

binaryClassification(
  (layer_1): Linear(in_features=8, out_features=64, bias=True)
  (layer_2): Linear(in_features=64, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.1, inplace=False)
  (batchnorm1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [20]:
def accuracy(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))
    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc= correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    return acc

In [21]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for epoch in range(epochs):
    iteration_loss = 0.
    iteration_accuracy = 0.

    model.train()
    for i, data in enumerate(train_loader):
        X, y = data
        
        X = X.to(device)
        y = y.to(device)

        y_pred = model(X.float())
        
        loss = BCE(y_pred, y.reshape(-1,1).float()) 

        iteration_loss += loss
        iteration_accuracy += accuracy(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if(epoch % print_epoch == 0):
        print('Train: epoch: {0} - loss: {1:.5f}; acc: {2:.3f}'.format(epoch, iteration_loss/(i+1), iteration_accuracy/(i+1)))

    iteration_loss = 0.
    iteration_accuracy = 0.
    model.eval()
    for i, data in enumerate(test_loader):
        X, y = data
        
        X = X.to(device)
        y = y.to(device)

        y_pred = model(X.float())
        loss = BCE(y_pred, y.reshape(-1,1).float())
        iteration_loss += loss
        iteration_accuracy += accuracy(y_pred, y)
        
    if(epoch % print_epoch == 0):
        print('Test: epoch: {0} - loss: {1:.5f}; acc: {2:.3f}'.format(epoch,
              iteration_loss/(i+1), iteration_accuracy/(i+1)))

Train: epoch: 0 - loss: 0.88844; acc: 35.889
Test: epoch: 0 - loss: 0.71080; acc: 42.000
Train: epoch: 100 - loss: 0.72258; acc: 48.333
Test: epoch: 100 - loss: 0.68717; acc: 56.000
Train: epoch: 200 - loss: 0.64661; acc: 63.000
Test: epoch: 200 - loss: 0.63301; acc: 63.750
Train: epoch: 300 - loss: 0.60566; acc: 69.889
Test: epoch: 300 - loss: 0.57566; acc: 72.000
Train: epoch: 400 - loss: 0.57935; acc: 69.667
Test: epoch: 400 - loss: 0.58182; acc: 67.500
Train: epoch: 500 - loss: 0.56236; acc: 71.667
Test: epoch: 500 - loss: 0.58281; acc: 70.000
Train: epoch: 600 - loss: 0.56554; acc: 71.667
Test: epoch: 600 - loss: 0.56014; acc: 69.000
Train: epoch: 700 - loss: 0.56592; acc: 73.000
Test: epoch: 700 - loss: 0.55722; acc: 70.500
Train: epoch: 800 - loss: 0.54052; acc: 73.222
Test: epoch: 800 - loss: 0.55749; acc: 71.750
Train: epoch: 900 - loss: 0.48593; acc: 80.222
Test: epoch: 900 - loss: 0.55104; acc: 70.250
Train: epoch: 1000 - loss: 0.51459; acc: 73.667
Test: epoch: 1000 - loss: 