In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.utils.data as data

from model import ResidualBlock, ResNet
from train_eval_util import train, evaluate, calculate_accuracy, epoch_time
from dataset import train_data, valid_data, test_data

import time

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')  
else:
    device = torch.device('cpu')

print(f"Selected device: {device}")

Selected device: cuda


In [3]:
BATCH_SIZE = 64

train_iterator = DataLoader(train_data, batch_size= BATCH_SIZE, shuffle=True)

valid_iterator =  DataLoader(valid_data, batch_size=BATCH_SIZE, shuffle=False)

test_iterator =  DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)

In [4]:
model = ResNet(ResidualBlock, [18, 18, 18], dropout_rate=0.5).to(device)

total_layers = sum([1 for _ in model.modules() 
    if isinstance(_, nn.Conv2d) or isinstance(_, nn.Linear)]) - 2 # subtract input and output layers
    
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total number of layers: {total_layers}")
print(f"Total number of parameters: {total_params}")

from torchsummary import summary
summary(model)

Total number of layers: 110
Total number of parameters: 1730714
Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            432
├─BatchNorm2d: 1-2                       32
├─Sequential: 1-3                        --
|    └─ResidualBlock: 2-1                --
|    |    └─Conv2d: 3-1                  2,304
|    |    └─BatchNorm2d: 3-2             32
|    |    └─Conv2d: 3-3                  2,304
|    |    └─BatchNorm2d: 3-4             32
|    |    └─Dropout: 3-5                 --
|    |    └─Sequential: 3-6              --
|    └─ResidualBlock: 2-2                --
|    |    └─Conv2d: 3-7                  2,304
|    |    └─BatchNorm2d: 3-8             32
|    |    └─Conv2d: 3-9                  2,304
|    |    └─BatchNorm2d: 3-10            32
|    |    └─Dropout: 3-11                --
|    |    └─Sequential: 3-12             --
|    └─ResidualBlock: 2-3                --
|    |    └─Conv2d: 3-13                 2,304
|    |    └─BatchNorm2d: 3-14      

Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            432
├─BatchNorm2d: 1-2                       32
├─Sequential: 1-3                        --
|    └─ResidualBlock: 2-1                --
|    |    └─Conv2d: 3-1                  2,304
|    |    └─BatchNorm2d: 3-2             32
|    |    └─Conv2d: 3-3                  2,304
|    |    └─BatchNorm2d: 3-4             32
|    |    └─Dropout: 3-5                 --
|    |    └─Sequential: 3-6              --
|    └─ResidualBlock: 2-2                --
|    |    └─Conv2d: 3-7                  2,304
|    |    └─BatchNorm2d: 3-8             32
|    |    └─Conv2d: 3-9                  2,304
|    |    └─BatchNorm2d: 3-10            32
|    |    └─Dropout: 3-11                --
|    |    └─Sequential: 3-12             --
|    └─ResidualBlock: 2-3                --
|    |    └─Conv2d: 3-13                 2,304
|    |    └─BatchNorm2d: 3-14            32
|    |    └─Conv2d: 3-15                 2,304
|    |  

In [5]:

criterion = nn.CrossEntropyLoss().to(device)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay = 0.0001)

# from torch.optim.lr_scheduler import StepLR
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)



In [6]:
num_epochs = 300
train_acc_history = []
train_loss_history = []
valid_acc_history = []
valid_loss_history = []
lr_epoch = []

for epoch in range(num_epochs):

    start_time = time.time()
    
   
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device)
    
    scheduler.step()
    lr_now = lr_now = optimizer.param_groups[0]['lr']
    lr_epoch.append(lr_now)

    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, device)
        
    end_time = time.time()

        
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% | LR: {lr_now:.6f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
    

    train_acc_history.append(train_acc)
    train_loss_history.append( train_loss)
    valid_acc_history.append(valid_acc)
    valid_loss_history.append(valid_loss)

Epoch: 01 | Epoch Time: 0m 56s
	Train Loss: 2.417 | Train Acc: 12.10% | LR: 0.099994
	 Val. Loss: 2.195 |  Val. Acc: 17.44%
Epoch: 02 | Epoch Time: 0m 55s
	Train Loss: 2.032 | Train Acc: 24.23% | LR: 0.099975
	 Val. Loss: 2.030 |  Val. Acc: 28.01%
Epoch: 03 | Epoch Time: 0m 55s
	Train Loss: 1.820 | Train Acc: 31.25% | LR: 0.099944
	 Val. Loss: 1.740 |  Val. Acc: 36.08%
Epoch: 04 | Epoch Time: 0m 55s
	Train Loss: 1.670 | Train Acc: 37.22% | LR: 0.099901
	 Val. Loss: 1.637 |  Val. Acc: 39.34%
Epoch: 05 | Epoch Time: 0m 55s
	Train Loss: 1.553 | Train Acc: 42.62% | LR: 0.099846
	 Val. Loss: 1.510 |  Val. Acc: 45.25%
Epoch: 06 | Epoch Time: 0m 55s
	Train Loss: 1.442 | Train Acc: 46.66% | LR: 0.099778
	 Val. Loss: 1.545 |  Val. Acc: 45.83%


In [None]:
import matplotlib.pyplot as plt

fig,(ax1, ax2) = plt.subplots(1, 2, figsize=(10, 3))

ax1.plot(range(num_epochs), train_loss_history, '--r')
ax1.plot(range(num_epochs), valid_loss_history, '-g')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.legend(['train', 'valid'])

ax2.plot(range(num_epochs), train_acc_history, '--r')
ax2.plot(range(num_epochs), valid_acc_history, '-g')
ax2.set_xlabel('Epochs')
ax2.set_xlabel('Accuracy')
ax1.legend(['train', 'valid'])