In [1]:
!nvidia-smi

Tue Apr 30 19:28:18 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.76                 Driver Version: 550.76         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 2070 ...    Off |   00000000:0A:00.0  On |                  N/A |
|  0%   49C    P5             24W /  215W |     500MiB /   8192MiB |     28%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
from torchvision import transforms
from torchvision import datasets

In [3]:
if torch.cuda.is_available():
    device = torch.device(type='cuda', index=0)
else:
    device = torch.device(type='cpu', index=0)
    
print(device)

cuda:0


In [4]:
trainSet = datasets.MNIST("", train=True, download=True, transform=transforms.Compose([ToTensor()]))
testSet = datasets.MNIST("", train=False, download=True, transform=transforms.Compose([ToTensor()]))

print(trainSet.data.ndim)
print(trainSet.data.shape)

3
torch.Size([60000, 28, 28])


In [5]:
batch_size = 64

trainDl = DataLoader(trainSet, batch_size, True)
testDl = DataLoader(testSet, batch_size)

In [6]:
class CNN(nn.Module):
    def __init__(self) -> None:
        super(CNN, self).__init__()
        self.relu = nn. ReLU()
        self.flatten = nn.Flatten()
        
        self.c1 = nn.Conv2d(1, 8, [3,3], 1, 0)
        self.bn1 = nn.BatchNorm2d(8)
        self.mpool1 = nn.MaxPool2d([2,2], 2, 0)
        
        self.c2 = nn.Conv2d(8, 16, [3,3], 1, 0)
        self.bn2 = nn.BatchNorm2d(16)
        
        self.c3 = nn.Conv2d(16, 32, [3,3], 1, 0)
        self.bn3 = nn.BatchNorm2d(32)
        
        self.f1 = nn.Linear(2592, 50)
        self.bn4 = nn.BatchNorm1d(50)
        self.f2 = nn.Linear(50, 10)
        self.bn5 = nn.BatchNorm1d(10)
        
        self.smax = nn.Softmax(dim=-1)
        
    def forward(self, x):
        x = self.c1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.mpool1(x)
        
        x = self.c2(x)
        x = self.bn2(x)
        x = self.relu(x)
        
        x = self.c3(x)
        x = self.bn3(x)
        x = self.relu(x)
        
        x = self.flatten(x)
        
        x = self.f1(x)
        x = self.bn4(x)
        x = self.relu(x)
        
        x = self.f2(x)
        x = self.bn5(x)
        
        return self.smax(x)
        

In [7]:
def train_one_epoch(model, dataloader, loss_fn, opt):
    model.train()
    track_loss = 0
    num_correct = 0
    
    for i, (imgs, label) in enumerate(dataloader):
        imgs = imgs.to(device)
        label = label.to(device)
        
        pred = model(imgs)
        loss = loss_fn(pred, label)
        track_loss+=loss.item()
        num_correct+=(torch.argmax(pred, dim=1) == label).type(torch.float).sum().item()
        
        loss.backward()
        opt.step()
        opt.zero_grad()
        
        if i%100==0:
            runnin_loss = round(track_loss/(i+1),2)
            runnin_acc = round((num_correct/((i+1)*batch_size))*100, 2)
            print(f"Batch: {i+1}, Run_acc: {runnin_acc}, Run_loss:{runnin_loss}")
            
        
    epoch_loss = track_loss/len(dataloader)
    epoch_acc = (num_correct/len(dataloader.dataset))*100
    return round(epoch_acc,2), round(epoch_loss,2)


def eval_one(dataloader, model, loss_fn):
    model.eval()
    track_loss = 0
    num_correct = 0
    
    with torch.no_grad():
        for i, (img, label) in enumerate(dataloader):
            img = img.to(device)
            label = label.to(device)
            
            pred = model(img)
            loss = loss_fn(pred, label)
            track_loss+=loss.item()
            num_correct+=(torch.argmax(pred, dim=1) == label).type(torch.float).sum().item()
            
            if i%100==0:
                running_loss = round(track_loss/(i+1),2)
                running_acc = round((num_correct/((i+1)*batch_size))*100,2)
                print(f"Batch: {i+1}/{len(dataloader)}, Running Loss: {running_loss}, Running Accuracy: {running_acc}")
                
    epoch_loss = track_loss/len(dataloader)#slightly inaccurate
    epoch_acc = (num_correct/len(dataloader.dataset))*100
    return round(epoch_loss, 2), round(epoch_acc, 2)

In [8]:
model = CNN()
model = model.to(device)
print(model)

CNN(
  (relu): ReLU()
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (c1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (mpool1): MaxPool2d(kernel_size=[2, 2], stride=2, padding=0, dilation=1, ceil_mode=False)
  (c2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (c3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (f1): Linear(in_features=2592, out_features=50, bias=True)
  (bn4): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (f2): Linear(in_features=50, out_features=10, bias=True)
  (bn5): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (smax): Softmax(dim=-1)
)


In [9]:
loss_fn = nn.CrossEntropyLoss()

lr= 0.001
opt = optim.Adam(params=model.parameters(), lr=lr)

EPOCHS = 20

In [10]:
for i in range(EPOCHS):
    print(f'-x-x-x-x-x- EPOCH NUMBER: {i} -x-x-x-x-x-x-')
    print('TRAIN')
    ep_a , ep_l = train_one_epoch(model, trainDl, loss_fn, opt)
    print(f"EPOCH({i+1}): Loss:{ep_l}, Acc:{ep_a}")
    print("VALIDATING")
    ep_l , ep_a = eval_one(testDl, model,loss_fn)
    print(f"EPOCH({i+1}): Loss:{ep_l}, Acc:{ep_a}")

-x-x-x-x-x- EPOCH NUMBER: 0 -x-x-x-x-x-x-
TRAIN
Batch: 1, Run_acc: 6.25, Run_loss:2.32
Batch: 101, Run_acc: 88.44, Run_loss:1.82
Batch: 201, Run_acc: 92.19, Run_loss:1.77
Batch: 301, Run_acc: 93.74, Run_loss:1.73
Batch: 401, Run_acc: 94.73, Run_loss:1.71
Batch: 501, Run_acc: 95.39, Run_loss:1.69
Batch: 601, Run_acc: 95.81, Run_loss:1.67
Batch: 701, Run_acc: 96.13, Run_loss:1.66
Batch: 801, Run_acc: 96.36, Run_loss:1.65
Batch: 901, Run_acc: 96.6, Run_loss:1.64
EPOCH(1): Loss:1.63, Acc:96.66
VALIDATING
Batch: 1/157, Running Loss: 1.53, Running Accuracy: 98.44
Batch: 101/157, Running Loss: 1.53, Running Accuracy: 98.69
EPOCH(1): Loss:1.53, Acc:98.92
-x-x-x-x-x- EPOCH NUMBER: 1 -x-x-x-x-x-x-
TRAIN
Batch: 1, Run_acc: 96.88, Run_loss:1.57
Batch: 101, Run_acc: 98.51, Run_loss:1.54
Batch: 201, Run_acc: 98.59, Run_loss:1.54
Batch: 301, Run_acc: 98.63, Run_loss:1.53
Batch: 401, Run_acc: 98.59, Run_loss:1.53
Batch: 501, Run_acc: 98.66, Run_loss:1.53
Batch: 601, Run_acc: 98.65, Run_loss:1.53
Batch