In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
df = pd.read_csv("/content/fmnist_small.csv")
df.sample(10)

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
5722,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4967,0,0,0,0,0,0,0,0,0,0,...,50,50,77,20,0,2,0,0,0,0
285,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5889,6,0,0,0,0,0,0,0,0,0,...,29,0,0,0,0,0,0,0,0,0
5046,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3263,4,0,0,0,0,0,0,0,0,0,...,53,0,0,55,174,149,8,0,0,0
677,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4550,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1181,9,0,0,0,0,0,0,0,0,0,...,0,0,55,23,24,20,3,0,0,0
2867,3,0,0,0,0,0,0,0,0,0,...,137,110,12,0,0,0,0,0,0,0


In [4]:
df.shape

(6000, 785)

Train-Test-Split

In [5]:
x = df.iloc[:, 1:].values
y = df.iloc[:,0].values

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 15)

In [7]:
x_train = x_train/255.0
x_test = x_test/255.0

In [8]:
class customdset(Dataset):
  def __init__(self, features, labels):
    self.features = torch.tensor(features, dtype = torch.float32).reshape( -1, 1, 28, 28) # (batch_size, channel, img_size)
    self.labels = torch.tensor(labels, dtype = torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, idx):
    return self.features[idx], self.labels[idx]

Dataset & DataLoaders

In [9]:
train_dataset = customdset(x_train, y_train)
test_dataset = customdset(x_test, y_test)

In [10]:
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True, pin_memory= True)
test_loader = DataLoader(test_dataset, batch_size = 32, shuffle = True, pin_memory= True)

Neural Network

In [18]:
class mynn(nn.Module):
  def __init__(self, input_features):
    super().__init__()

    self.features = nn.Sequential(
        nn.Conv2d(input_features, 32, kernel_size = 3, padding = 'same'),
        nn.ReLU(),
        nn.BatchNorm2d(32),
        nn.MaxPool2d(kernel_size = 2, stride = 2),

        nn.Conv2d(32, 64, kernel_size = 3, padding = 'same'),
        nn.ReLU(),
        nn.BatchNorm2d(64),
        nn.MaxPool2d(kernel_size = 2, stride = 2)
    )

    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(64*7*7, 128),
        nn.ReLU(),
        nn.Dropout(p = 0.3),

        nn.Linear(128, 64),
        nn.ReLU(),

        nn.Linear(64, 10)

    )

  def forward(self, x):

    x = self.features(x)
    x = self.classifier(x)

    return x

Important  parameters

In [19]:
learning_rate = 0.01
epochs = 100

Define Model

In [20]:
model = mynn(1)
model.to(device)

criterion = nn.CrossEntropyLoss()
opt = optim.Adam(model.parameters(), lr = learning_rate, weight_decay = 1e-4)

Training Loop

In [21]:
for epoch in range(epochs):
  total_epoch_loss = 0

  for batch_features, batch_labels in train_loader:

    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    # forward pass
    outputs = model(batch_features)

    # Loss calculation
    loss = criterion(outputs, batch_labels)

    # backward pass
    opt.zero_grad()
    loss.backward()

    # update gradients
    opt.step()

    total_epoch_loss = total_epoch_loss + loss.item()

  avg_loss = total_epoch_loss/len(train_loader)
  print(f"Epoch: {epoch}, Loss: {avg_loss}")

Epoch: 0, Loss: 1.0619207076231638
Epoch: 1, Loss: 0.6934535783529282
Epoch: 2, Loss: 0.5952972970406214
Epoch: 3, Loss: 0.5657862187425295
Epoch: 4, Loss: 0.48897886261343954
Epoch: 5, Loss: 0.44245217690865196
Epoch: 6, Loss: 0.44100736757119496
Epoch: 7, Loss: 0.4379898718992869
Epoch: 8, Loss: 0.4091191459695498
Epoch: 9, Loss: 0.38593021864692373
Epoch: 10, Loss: 0.40339642321070035
Epoch: 11, Loss: 0.36644943957527476
Epoch: 12, Loss: 0.3296275047461192
Epoch: 13, Loss: 0.3317082443336646
Epoch: 14, Loss: 0.340682531495889
Epoch: 15, Loss: 0.3145220379531384
Epoch: 16, Loss: 0.32934506331880886
Epoch: 17, Loss: 0.3279283151775598
Epoch: 18, Loss: 0.3306216437369585
Epoch: 19, Loss: 0.31418216849366826
Epoch: 20, Loss: 0.32787731212874255
Epoch: 21, Loss: 0.2908753453443448
Epoch: 22, Loss: 0.32225187261899313
Epoch: 23, Loss: 0.28683160622914633
Epoch: 24, Loss: 0.28659538065393764
Epoch: 25, Loss: 0.3115251868714889
Epoch: 26, Loss: 0.33039538433154425
Epoch: 27, Loss: 0.3234183

Model Evaluation

In [22]:
model.eval()

mynn(
  (features): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (5): ReLU()
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=3136, out_features=128, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [23]:
total = 0
correct = 0

with torch.no_grad():
  for batch_features, batch_labels in test_loader:
    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    outputs = model(batch_features)

    _, predicted = torch.max(outputs.data, 1)

    total = total + batch_labels.size(0)

    correct = correct + (predicted == batch_labels).sum().item()

print(correct/total)

0.8758333333333334


In [24]:
total = 0
correct = 0

with torch.no_grad():
  for batch_features, batch_labels in train_loader:
    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    outputs = model(batch_features)

    _, predicted = torch.max(outputs.data, 1)

    total = total + batch_labels.size(0)

    correct = correct + (predicted == batch_labels).sum().item()

print(correct/total)

0.9695833333333334
