<a href="https://colab.research.google.com/github/vishal-burman/PyTorch-Architectures/blob/master/modeling_ShuffleNet/test_sample_ShuffleNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! nvidia-smi

In [None]:
! pip install datasets
! pip install wget

In [None]:
! rm -rf PyTorch-Architectures/
! git clone https://github.com/vishal-burman/PyTorch-Architectures.git
%cd PyTorch-Architectures/

In [2]:
from tqdm.auto import tqdm
import torch
from toolkit.custom_dataset_cv import DataLoaderCIFAR10Classification
from toolkit.metrics import cv_compute_accuracy
from toolkit.utils import get_optimal_batchsize, dict_to_device, EarlyStopping
from modeling_ShuffleNet.model import ShuffleNet
from modeling_ShuffleNet.config import ShuffleNetConfig

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
config = ShuffleNetConfig()
model = ShuffleNet(config)
model.to(device)

In [4]:
params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('Trainable Parameters: ', params)

Trainable Parameters:  960706


In [5]:
train_loader = DataLoaderCIFAR10Classification(resize=224, train=True)
valid_loader = DataLoaderCIFAR10Classification(resize=224, train=False)

cifar10 extracted...
cifar10 exists...


In [6]:
# get_optimal_batchsize(train_loader.dataset, model) --> 256

In [7]:
# Hyperparameters
BS = 256
EPOCHS = 100
LR = 5e-3

In [8]:
train_loader = train_loader.return_dataloader(batch_size=BS, shuffle=True)
valid_loader = valid_loader.return_dataloader(batch_size=BS, shuffle=False)
print('Length of Train Loader: ', len(train_loader))
print('Length of Valid Loader: ', len(valid_loader))

Length of Train Loader:  196
Length of Valid Loader:  40


In [9]:
# Sanity check forward pass
model.eval()
with torch.set_grad_enabled(False):
  for sample in train_loader:
    outputs = model(**dict_to_device(sample, device))
    loss, logits = outputs[0], outputs[1]
    print(logits.shape, loss.item())
    break

torch.Size([256, 10]) 1802.0545654296875


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [10]:
early_stop = EarlyStopping(metric="val_accuracy", verbose=True)

In [11]:
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)

In [12]:
num_training_steps = len(train_loader) * EPOCHS
progress_bar = tqdm(range(num_training_steps))

for epoch in range(EPOCHS):
  model.train()
  for sample in train_loader:
      outputs = model(**dict_to_device(sample, device))
      loss = outputs[0]
      loss.backward()

      optimizer.step()
      optimizer.zero_grad()
      progress_bar.update(1)
  model.eval()
  with torch.set_grad_enabled(False):
    valid_acc = cv_compute_accuracy(model, valid_loader, device)
    early_stop(valid_acc, model)
    if early_stop.early_stop:
      print("Early Stopping!")
      break

HBox(children=(FloatProgress(value=0.0, max=19600.0), HTML(value='')))

Validation accuracy increased from -inf% to 55.09%
Validation accuracy increased from 55.09% to 71.22%
Validation accuracy increased from 71.22% to 72.97%
Validation accuracy increased from 72.97% to 78.06%
Validation accuracy increased from 78.06% to 80.14%
EarlyStopping counter: 1 out of 3
Validation accuracy increased from 80.14% to 81.25%
Validation accuracy increased from 81.25% to 81.63%
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 2 out of 3
EarlyStopping counter: 3 out of 3
Early Stopping!
