In [13]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [14]:
df = pd.read_csv('../torch/chap09/data/diabetes.csv')

In [15]:
X = df[df.columns[:-1]]
y = df['Outcome']

X = X.values
y = torch.tensor(y.values)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [16]:
ms = MinMaxScaler()
ss = StandardScaler()

X_train = ss.fit_transform(X_train)
X_test = ss.fit_transform(X_test)
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)
y_train = ms.fit_transform(y_train)
y_test = ms.fit_transform(y_test)

In [17]:
class customdataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.len = len(self.X)
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    def __len__(self):
        return self.len

In [18]:
train_data = customdataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
test_data = customdataset(torch.FloatTensor(X_test), torch.FloatTensor(y_test))

train_loader = DataLoader(dataset=train_data, batch_size = 64, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size = 64, shuffle=False)

In [19]:
class binaryClassification(nn.Module):
    def __init__(self):
        super(binaryClassification, self).__init__()
        self.layer_1 = nn.Linear(8, 64, bias=True)
        self.layer_2 = nn.Linear(64, 64, bias=True)
        self.layer_out = nn.Linear(64, 1, bias=True)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(64)
        self.batchnorm2 = nn.BatchNorm1d(64)

    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        return x

In [20]:
epochs = 1000 + 1
print_epoch = 100
LEARNING_RATE = 1e-2

model = binaryClassification()
model.to(device)
print(model)
BCE = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

binaryClassification(
  (layer_1): Linear(in_features=8, out_features=64, bias=True)
  (layer_2): Linear(in_features=64, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.1, inplace=False)
  (batchnorm1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [21]:
def accuracy(y_pred, y_test):
    y_pred_tag= torch.round(torch.sigmoid(y_pred))
    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    return acc

In [22]:
for epoch in range(epochs):
    iteration_loss = 0.
    iteration_accuracy = 0.
    model.train()
    for i, data in enumerate(train_loader):
        X, y = data
        
        X = X.to(device)
        y = y.to(device)
        
        y_pred = model(X.float())
        loss = BCE(y_pred, y.reshape(-1, 1).float())

        iteration_loss += loss
        iteration_accuracy += accuracy(y_pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if(epoch % print_epoch == 0):
        print('Train: epoch: {0} - loss: {1:.5f}; acc: {2:.3f}'.format(epoch, iteration_loss/(i+1), iteration_accuracy/(i+1)))

    iteration_loss = 0.
    iteration_accuracy = 0.
    model.eval()
    for i, data in enumerate(test_loader):
        X, y = data
        X = X.to(device)
        y = y.to(device)
        y_pred = model(X.float())
        loss = BCE(y_pred, y.reshape(-1, 1).float())
        iteration_loss += loss
        iteration_accuracy += accuracy(y_pred, y)
    if(epoch % print_epoch == 0):
        print('Test: epoch: {0} - loss: {1:.5f}; acc: {2:.3f}'.format(epoch, iteration_loss/(i+1), iteration_accuracy/(i+1)))

Train: epoch: 0 - loss: 0.68259; acc: 58.556
Test: epoch: 0 - loss: 0.67094; acc: 67.750
Train: epoch: 100 - loss: 0.40283; acc: 83.333
Test: epoch: 100 - loss: 0.51214; acc: 75.250
Train: epoch: 200 - loss: 0.49786; acc: 72.333
Test: epoch: 200 - loss: 0.49303; acc: 77.000
Train: epoch: 300 - loss: 0.38072; acc: 84.111
Test: epoch: 300 - loss: 0.53525; acc: 74.000
Train: epoch: 400 - loss: 0.49180; acc: 74.000
Test: epoch: 400 - loss: 0.53553; acc: 75.250
Train: epoch: 500 - loss: 0.48128; acc: 78.556
Test: epoch: 500 - loss: 0.54473; acc: 75.500
Train: epoch: 600 - loss: 0.39499; acc: 84.000
Test: epoch: 600 - loss: 0.54120; acc: 76.250
Train: epoch: 700 - loss: 0.37308; acc: 83.667
Test: epoch: 700 - loss: 0.51778; acc: 79.000
Train: epoch: 800 - loss: 0.42798; acc: 78.222
Test: epoch: 800 - loss: 0.58241; acc: 73.750
Train: epoch: 900 - loss: 0.43160; acc: 79.111
Test: epoch: 900 - loss: 0.58520; acc: 74.750
Train: epoch: 1000 - loss: 0.33810; acc: 85.778
Test: epoch: 1000 - loss: 