# DATASET & DATALOADER CLASS

In [None]:
import torch
from sklearn.datasets import make_classification

1. Create a Classification dataset using sklearn

In [None]:
x, y = make_classification(
    n_samples = 100,
    n_features = 4,
    n_informative = 4,
    n_redundant = 0,
    n_classes = 4,
    random_state = 11
)

In [None]:
x.shape

(100, 4)

In [None]:
y.shape

(100,)

2. Convert data into pytorch Tensor

In [None]:
x = torch.tensor(x, dtype = torch.float32)
y = torch.tensor(y, dtype = torch.long)

In [None]:
# x
# y

In [None]:
from torch.utils.data import Dataset, DataLoader

In [None]:
class xydata(Dataset):

  def __init__(self, features, labels):
    self.features = features
    self.labels = labels

  def __len__(self):

    return self.features.shape[0]

  def __getitem__(self, index):

    return self.features[index], self.labels[index]

In [None]:
Dataset = xydata(x, y) # object of xydata

In [None]:
len(x) # here "len" = "rows"

100

In [None]:
Dataset[3]

(tensor([ 1.4515,  0.6380, -1.4070, -0.9375]), tensor(1))

In [None]:
dataloader = DataLoader(Dataset, batch_size=4, shuffle=True)

In [None]:
for batch_features, batch_labels in dataloader:
  print(batch_features)
  print(batch_labels)
  print("-"*20)

tensor([[ 0.4606, -0.8410,  0.3158,  0.6789],
        [-1.4197, -2.3551, -0.3654,  1.5621],
        [ 1.3888,  2.1982, -0.1456, -2.1072],
        [ 0.1582,  0.6447,  2.1544, -0.0522]])
tensor([3, 3, 2, 0])
--------------------
tensor([[ 0.0745,  1.5828,  2.9973,  1.2631],
        [-0.7728, -2.1191, -0.4978, -0.6310],
        [ 1.7991,  0.4301, -0.2611, -0.8543],
        [ 0.2340, -1.5332,  1.1828, -1.1363]])
tensor([3, 1, 2, 2])
--------------------
tensor([[ 0.6396, -2.1824,  0.1601,  1.1940],
        [-3.6407, -1.8682, -2.7861,  2.8220],
        [-0.8981, -2.6257, -0.2809,  1.0585],
        [ 0.2206, -1.4766, -1.2213,  0.0175]])
tensor([3, 0, 3, 2])
--------------------
tensor([[ 1.5859,  1.9150,  0.0412, -1.7312],
        [-0.3448,  1.1229,  0.4435, -0.1629],
        [-1.5369,  0.8697, -2.3498,  0.5394],
        [-0.8524,  0.8297,  0.8796, -1.8445]])
tensor([2, 1, 1, 1])
--------------------
tensor([[ 1.1774, -0.9275,  2.1402, -1.7657],
        [ 0.7926, -0.3141,  0.4090, -1.7723],


- Batches are divided using Dataset & DataLoader class

# PyTorch Training Pipeline using Dtaset & DataLoader

In [None]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [None]:
df = pd.read_csv("/content/Heart_Disease_Prediction.csv")
df.sample(6)

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
266,44,1,2,120,263,0,0,173,0,0.0,1,0,7,Absence
194,48,1,3,124,255,1,0,175,0,0.0,1,2,3,Absence
174,34,1,1,118,182,0,2,174,0,0.0,1,0,3,Absence
261,60,1,4,130,206,0,2,132,1,2.4,2,2,7,Presence
263,49,1,2,130,266,0,0,171,0,0.6,1,0,3,Absence
111,41,1,2,110,235,0,0,153,0,0.0,1,0,3,Absence


In [None]:
df.shape

(270, 14)

Train-Test-Split

In [None]:
x = df.drop("Heart Disease", axis = 1)
y = df["Heart Disease"]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

Scaling

In [None]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

Label Encoding

In [None]:
enc = LabelEncoder()
y_train = enc.fit_transform(y_train)
y_test = enc.transform(y_test)

Convert NumPy Array to PyTorch Tensor

In [None]:
x_train_tensor = torch.from_numpy(x_train.astype(np.float32))
x_test_tensor = torch.from_numpy(x_test.astype(np.float32))
y_train_tensor = torch.from_numpy(y_train.astype(np.float32))
y_test_tensor = torch.from_numpy(y_test.astype(np.float32))

In [None]:
x_train_tensor.shape

torch.Size([216, 13])

In [None]:
y_train_tensor.shape

torch.Size([216])

In [None]:
from torch.utils.data import Dataset, DataLoader

class customdataset(Dataset):

  def __init__(self, features, labels):
    self.features = features
    self.labels = labels

  def __len__(self):

    return len(self.features)
  def __getitem__(self, index):

    return self.features[index], self.labels[index]

In [None]:
train_dataset = customdataset(x_train_tensor, y_train_tensor)
test_dataset = customdataset(x_test_tensor, y_test_tensor)

In [None]:
train_dataset[10]

(tensor([-1.0548,  0.6633,  0.8352,  0.6074,  1.1168, -0.4247,  0.9525, -0.0476,
          1.3436, -0.9346,  0.6122,  2.3898,  1.1153]),
 tensor(1.))

In [None]:
train_loader = DataLoader(train_dataset, batch_size = 15, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = 15, shuffle = True)

Defining the Model

In [None]:
import torch.nn as nn

class mynn(nn.Module):

  def __init__(self, num_features):

    super().__init__()
    self.linear = nn.Linear(num_features, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, features):

    out = self.linear(features)
    out = self.sigmoid(out)

    return out


Important Parameter

In [None]:
learning_rate = 0.1
epochs = 25

In [None]:
# create Model
model = mynn(x_train_tensor.shape[1])

# optimizer
opt = torch.optim.SGD(model.parameters(), lr = learning_rate)

# loss function
loss_fn = nn.BCELoss()

Training Pipeline

In [None]:
# loop
for epoch in range(epochs):
   for batch_features, batch_labels in train_loader:

    # forward pass
    y_pred = model(batch_features)

    # loss
    loss = loss_fn(y_pred, batch_labels.unsqueeze(1))

    # clear gradient
    opt.zero_grad()

    # backward pass
    loss.backward()

    # parameter update
    opt.step()

    # Print loss in each epoch
    print(f'Epoch: {epoch}, Loss: {loss.item()}')

Epoch: 0, Loss: 0.729789137840271
Epoch: 0, Loss: 0.772480845451355
Epoch: 0, Loss: 0.7617367506027222
Epoch: 0, Loss: 0.6550451517105103
Epoch: 0, Loss: 0.7586795687675476
Epoch: 0, Loss: 0.5600350499153137
Epoch: 0, Loss: 0.6834023594856262
Epoch: 0, Loss: 0.5775937438011169
Epoch: 0, Loss: 0.5858953595161438
Epoch: 0, Loss: 0.46546879410743713
Epoch: 0, Loss: 0.5115152597427368
Epoch: 0, Loss: 0.46862685680389404
Epoch: 0, Loss: 0.5264489054679871
Epoch: 0, Loss: 0.5495933890342712
Epoch: 0, Loss: 0.32500454783439636
Epoch: 1, Loss: 0.6131629347801208
Epoch: 1, Loss: 0.5578600168228149
Epoch: 1, Loss: 0.4184809923171997
Epoch: 1, Loss: 0.5059427618980408
Epoch: 1, Loss: 0.36040613055229187
Epoch: 1, Loss: 0.48853635787963867
Epoch: 1, Loss: 0.3122863173484802
Epoch: 1, Loss: 0.39130327105522156
Epoch: 1, Loss: 0.46883752942085266
Epoch: 1, Loss: 0.5379672646522522
Epoch: 1, Loss: 0.4201732277870178
Epoch: 1, Loss: 0.38315877318382263
Epoch: 1, Loss: 0.25180989503860474
Epoch: 1, Los

Evaluation

In [None]:
# model evaluation using test_loader
model.eval() # set model to evaluation mode
accuracy_list = []

with torch.no_grad():
  for batch_features, batch_labels in test_loader: # for load batches

    # forward pass
    y_pred = model(batch_features)
    y_pred = (y_pred > 0.8).float() # convert prob. to binary pred.

    # calculate accuracy
    batch_accuracy = (y_pred.view(-1) == batch_labels).float().mean()
    accuracy_list.append(batch_accuracy)

# calculate overall accuracy
overall_accuracy = sum(accuracy_list) / len(accuracy_list)
print(f'Overall Accuracy: {overall_accuracy:4f}')

Overall Accuracy: 0.861111
