<a href="https://colab.research.google.com/github/vidushiMaheshwari/AIModels/blob/main/pretrained_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Timm Models

In [None]:
!pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.5.4-py3-none-any.whl (431 kB)
[K     |████████████████████████████████| 431 kB 8.4 MB/s 
Installing collected packages: timm
Successfully installed timm-0.5.4


In [None]:
import torch
import torchvision
import pprint
import timm
import torch.nn as nn
import time
from sklearn.model_selection import train_test_split
import numpy as np
from torch.utils.data import DataLoader
from torchvision import transforms

In [None]:
batch_size = 64
dataset = torchvision.datasets.FashionMNIST(root= '/content/fashionMNIST', 
                                            download = True,
                                            transform=transforms.Compose([transforms.Resize((224, 224)),
                                                                          transforms.ToTensor(),
                                                                          transforms.Lambda(lambda x: x.repeat(3, 1, 1) if x.size(0) == 1 else x)]),
                                            train= True)
model_name = 'mobilenetv2_100'
num_epochs = 40
learning_rate = 0.0006918309954926372


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
new_dataset = [dataset[i] for i in range(10000)]

In [None]:
train_dataset, valid_dataset = train_test_split(new_dataset, train_size=0.8, shuffle=False)

In [None]:
train_loader, valid_loader = DataLoader(train_dataset, batch_size=64, shuffle=True), DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)

In [None]:
class CFG:
  DEBUG = False

  ### input: not configurable
  IMG_HEIGHT = 28
  IMG_WIDTH = 28
  N_CLASS = 10

  ### split train and validation sets
  split_fraction = 0.8

  ### model
  model_name = model_name # 'resnet34', 'resnet200d', 'efficientnet_b1_pruned', 'efficientnetv2_m', efficientnet_b7 ...  

  ### training
  print_freq = 100
  BATCH_SIZE = batch_size
  N_EPOCHS = num_epochs

  ### set only one to True
  save_best_loss = False
  save_best_accuracy = True

  ### optimizer
  # optimizer = 'adam'
  # optimizer = 'adamw'
  optimizer = 'rmsprop'
  LEARNING_RATE = learning_rate
  weight_decay = 0.1 # for adamw
  l2_penalty = 0.01 # for RMSprop
  rms_momentum = 0 # for RMSprop

  ### learning rate scheduler (LRS)
  scheduler = 'ReduceLROnPlateau'
  # scheduler = 'CosineAnnealingLR'
  plateau_factor = 0.5
  plateau_patience = 3
  cosine_T_max = 4
  cosine_eta_min = 1e-8
  verbose = True

  ### train and validation DataLoaders
  shuffle = False

  ### albumentations
  probability = 0.6

  random_seed = 42

In [None]:
timm.list_models()

In [None]:
class GetModel(nn.Module):
    def __init__(self, model_name = CFG.model_name, pretrained = True):
        super().__init__()

        self.model_name = model_name
        self.cnn = timm.create_model(self.model_name, pretrained = pretrained, num_classes = CFG.N_CLASS, in_chans =3)

    def forward(self, x):
        x = self.cnn(x)
        return x

model = GetModel()

In [None]:
def get_optimizer(lr=CFG.LEARNING_RATE): ## CFG.LEARNING_RATE, CFG.optimizer
  if CFG.optimizer == 'adam':
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=CFG.weight_decay, amsgrad=False)
  elif CFG.optimizer == 'adamw':
    optimizer = torch.optim.AnadW(model.parameters(), lr=lr, weight_decay=CFG.weight_decay)
  elif CFG.optimizer == 'rmsprop':
      optimizer = torch.optim.RMSprop(model.parameters(), lr = lr, weight_decay = CFG.l2_penalty, momentum = CFG.rms_momentum)

  else:
      print('Optimizer is not defined')      

  return optimizer

In [None]:
def get_scheduler(optimizer):

  if CFG.scheduler=='ReduceLROnPlateau':
      scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor = CFG.plateau_factor, patience = CFG.plateau_patience, verbose = CFG.verbose)

  elif CFG.scheduler=='CosineAnnealingLR':
      scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = CFG.cosine_T_max, eta_min = CFG.cosine_eta_min)

  else:
      print('LR Scheduler is not defined')

  return scheduler

In [None]:
def train_model(train_loader, model, device, optimizer=None, criterion=None):
  model = model.to(device)
  model.train()

  size = len(train_loader.dataset)
  num_batches = len(train_loader)

  loss, correct = 0, 0

  for batch, (x, y) in enumerate(train_loader):
    start = time.time()
    ## batch is the batch number starting from 0
    ## x holds the tensor value sof all image data
    ## y hols the labels of all images
    device = torch.device(device)
    x, y = x.to(device), y.to(device)

    if optimizer is not None:
      optimizer.zero_grad()
    pred = model(x)
    loss = criterion(pred, y.long().squeeze())
    current = batch * len(x)

    loss.backward()
    if optimizer is not None:
      optimizer.step()
    y_pred, y_true = torch.argmax(pred, axis=1), y.long().squeeze()
    correct += (y_pred == y_true).type(torch.float).sum().item()

    end = time.time()
    time_delta = np.round(end-start, 3) 
    loss, current = np.round(loss.item(), 5), batch * len(x)

  correct /= size
  loss /= num_batches

  pprint.pprint(f"Train: Accuracy: {(100*correct):>0.2f}%, Avg loss: {loss:>5f} \n")

  return loss, correct

In [None]:
def valid_model(valid_loader, model, criterion=None, device=device):
  model = model.to(device)
  model.eval()

  size = len(valid_loader.dataset)
  num_batches = len(valid_loader)

  loss, correct = 0, 0
  with torch.no_grad(): # disable gradients
    for batch, (X, y) in enumerate(valid_loader):

      start = time.time()
  
      device = torch.device(device)
      X, y = X.to(device), y.to(device)

      # compute predictions and loss
      pred = model(X)
      loss = criterion(pred, y.long().squeeze()) 
      current = batch * len(X)
      
      # sum correct predictions
      y_pred, y_true = torch.argmax(pred, axis=1), y.long().squeeze()
      correct += (y_pred == y_true).type(torch.float).sum().item()

      end = time.time()
      time_delta = np.round(end - start, 3)
      
      # log
      loss, current = np.round(loss.item(), 5), batch * len(X)
      # if batch % (CFG.print_freq) == 0:
      #   print(f"Valid Batch: {current:>5d}/{size:>5d}: loss: {loss:>5f} Elapsed Time: {time_delta} s")

  # metrics: calculate accuracy and loss for epoch (all batches)
  correct /= size # epoch accuracy
  loss /= num_batches # epoch loss

  print(f"Valid: Accuracy: {(100*correct):>0.2f}%, Avg loss: {loss:>5f} \n")

  return loss, correct

 Running Training

In [None]:
loss_fn = nn.CrossEntropyLoss()

device = device
model = model

optimizer = get_optimizer(learning_rate)
scheduler = get_scheduler(optimizer)

In [None]:
###### SET PARAMETERS HERE ###
num_epochs = 40

#-----------------------------#
train_loss_history = []
train_acc_history = []
valid_loss_history = []
valid_acc_history = []
LR_history = []

best_loss = np.inf
best_epoch_loss = 0
best_acc = 0
best_epoch_acc = 0

start_train_time = time.time()

for epoch in range(0, num_epochs):
  print(f"\n-------------------------------   Epoch {epoch + 1}   -------------------------------\n")
  start_epoch_time = time.time()

  # train
  train_loss, train_acc = train_model(train_loader, model, criterion=loss_fn, optimizer=optimizer, device=device)
  train_loss_history.append(train_loss)
  train_acc_history.append(train_acc)

  # validation
  valid_loss, valid_acc = valid_model(valid_loader, model, loss_fn, device)
  valid_loss_history.append(valid_loss)
  valid_acc_history.append(valid_acc)

  # apply LR scheduler after each epoch
  if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
      scheduler.step(valid_loss)

  elif isinstance(scheduler, torch.optim.lr_scheduler.CosineAnnealingLR):
      scheduler.step()

  # save LR value to plot later
  for param_group in optimizer.param_groups:
    LR_history.append(param_group['lr'])

  # save validation loss if it was improved (reduced)
  if valid_loss < best_loss:
    best_epoch_loss = epoch + 1
    best_loss = valid_loss
    # if CFG.save_best_loss:
    #   # save the model's weights and biases only if CFG.save_best_loss == True
    #   torch.save(model.state_dict(), OUTPUT_PATH + f"DigitModel_ep{best_epoch_loss}.pth")

  # save validation accuracy if it was improved (increased)
  if valid_acc > best_acc:
    best_epoch_acc = epoch + 1
    best_acc = valid_acc
    # if CFG.save_best_accuracy:
    #   # save the model's weights and biases only if CFG.save_best_accuracy == True
    #   torch.save(model.state_dict(), OUTPUT_PATH + f"DigitModel_ep{best_epoch_acc}.pth")    

  end_epoch_time = time.time()
  time_delta = np.round(end_epoch_time - start_epoch_time, 3)
  print("\n\nEpoch Elapsed Time: {} s".format(time_delta))

end_train_time = time.time()
print("\n\nTotal Elapsed Time: {} min".format(np.round((end_train_time - start_train_time)/60, 3)))
print("Done!")


6.336 min


12.575 min

VGG 16 -> 158 * 40 seconds

vit_small_patch32_224_in21k :: 21.4 sec / epoch

mobilenetv2_100 -> 35 seconds / epoch

# Pytorch

In [None]:
import torch
import torchvision
import numpy as np
import torch.nn as nn
import torch.optim
from torch.utils.data import DataLoader
from torchvision import transforms
import cv2
from torch.autograd import Variable
import time
import pprint
from sklearn.model_selection import train_test_split

In [None]:
model_name = 'googlenet'
# dataset = torchvision.datasets.FashionMNIST(root= '/content/fashionMNIST', 
#                                             download = True,
#                                             transform=transforms.Compose([transforms.Resize((224, 224)),
#                                                                           transforms.ToTensor(),
#                                                                           transforms.Lambda(lambda x: x.repeat(3, 1, 1) if x.size(0) == 1 else x)]),        
#                                             train= False)
# learning_rate = 0.00069


model = torch.hub.load('pytorch/vision:v0.10.0', model_name, pretrained=True)
model.eval()

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Downloading: "https://github.com/pytorch/vision/archive/v0.10.0.zip" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth


  0%|          | 0.00/49.7M [00:00<?, ?B/s]

In [None]:
dataset = [dataset[i] for i in range(10000)]

In [None]:
train_dataset,valid_dataset = train_test_split(dataset, train_size=0.8)
train_loader, valid_loader = DataLoader(train_dataset, batch_size=64, shuffle=True), DataLoader(valid_dataset, batch_size=64, shuffle=True)

In [None]:
def train_model(train_loader, model, num_epochs = 40, criterion=None, optimizer=None, device=device):
  model = model.to(device)
  model.train()

  size = len(train_loader.dataset)
  num_batches = len(train_loader)

  loss, correct = 0, 0
  num_batched = len(train_loader)
  for batch, (x, y) in enumerate(train_loader):
    start = time.time()
    ## batch is the batch number starting from 0
    ## x holds the tensor value sof all image data
    ## y hols the labels of all images
    device = torch.device(device)
    x, y = x.to(device), y.to(device)

    if optimizer is not None:
      optimizer.zero_grad()
    pred = model(x)
    loss = criterion(pred, y.long().squeeze())
    current = batch * len(x)

    loss.backward()
    if optimizer is not None:
      optimizer.step()
    y_pred, y_true = torch.argmax(pred, axis=1), y.long().squeeze()
    correct += (y_pred == y_true).type(torch.float).sum().item()

    end = time.time()
    time_delta = np.round(end-start, 3) 
    loss, current = np.round(loss.item(), 5), batch * len(x)

  correct /= size
  loss /= num_batches

  pprint.pprint(f"Train: Accuracy: {(100*correct):>0.2f}%, Avg loss: {loss:>5f} \n")

  return loss, correct

In [None]:
def valid_model(valid_loader, model, criterion=None, device=device):
  model = model.to(device)
  model.eval()

  size = len(valid_loader.dataset)
  num_batches = len(valid_loader)

  loss, correct = 0, 0
  with torch.no_grad(): # disable gradients
    for batch, (X, y) in enumerate(valid_loader):

      start = time.time()
  
      device = torch.device(device)
      X, y = X.to(device), y.to(device)

      # compute predictions and loss
      pred = model(X)
      loss = criterion(pred, y.long().squeeze()) 
      current = batch * len(X)
      
      # sum correct predictions
      y_pred, y_true = torch.argmax(pred, axis=1), y.long().squeeze()
      correct += (y_pred == y_true).type(torch.float).sum().item()

      end = time.time()
      time_delta = np.round(end - start, 3)
      
      # log
      loss, current = np.round(loss.item(), 5), batch * len(X)
      # if batch % (CFG.print_freq) == 0:
      #   print(f"Valid Batch: {current:>5d}/{size:>5d}: loss: {loss:>5f} Elapsed Time: {time_delta} s")

  # metrics: calculate accuracy and loss for epoch (all batches)
  correct /= size # epoch accuracy
  loss /= num_batches # epoch loss

  print(f"Valid: Accuracy: {(100*correct):>0.2f}%, Avg loss: {loss:>5f} \n")

  return loss, correct

In [None]:
def get_optimizer(lr, optimizer, weight_decay, model, momentum):
  if optimizer == 'adam':
    return(torch.optim.Adam(model.paramters(), lr=lr, weight_decay=weight_decay, amsgrad=False))
  elif optimizer == 'adamw':
    return(torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay))
  elif optimizer == 'rmsprop':
    return(torch.optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=momentum))
  

In [None]:
def get_scheduler(optimizer, scheduler, plateau_factor, verbose, cosine_T_max, cosine_eta_min):
  if scheduler == 'ReduceLROnPlateau':
    return(torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=plateau_factor, verbose=verbose))
  elif scheduler == 'CosineAnnealingLR':
    return(torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = cosine_T_max, eta_min=cosine_eta_min))

In [None]:
loss_fn = nn.CrossEntropyLoss()

device = device
model = model

optimizer = get_optimizer(learning_rate, 'rmsprop', 0.1, model, 0)
scheduler = get_scheduler(optimizer, 'ReduceLROnPlateau', 0.5, True, 4, 1e-8)

In [None]:
###### SET PARAMETERS HERE ###
num_epochs = 40

#-----------------------------#
train_loss_history = []
train_acc_history = []
valid_loss_history = []
valid_acc_history = []
LR_history = []

best_loss = np.inf
best_epoch_loss = 0
best_acc = 0
best_epoch_acc = 0

start_train_time = time.time()

for epoch in range(0, num_epochs):
  print(f"\n-------------------------------   Epoch {epoch + 1}   -------------------------------\n")
  start_epoch_time = time.time()

  # train
  train_loss, train_acc = train_model(train_loader, model, num_epochs = num_epochs ,criterion=loss_fn, optimizer=optimizer, device=device)
  train_loss_history.append(train_loss)
  train_acc_history.append(train_acc)

  # validation
  valid_loss, valid_acc = valid_model(valid_loader, model, loss_fn, device)
  valid_loss_history.append(valid_loss)
  valid_acc_history.append(valid_acc)

  # apply LR scheduler after each epoch
  if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
      scheduler.step(valid_loss)

  elif isinstance(scheduler, torch.optim.lr_scheduler.CosineAnnealingLR):
      scheduler.step()

  # save LR value to plot later
  for param_group in optimizer.param_groups:
    LR_history.append(param_group['lr'])

  # save validation loss if it was improved (reduced)
  if valid_loss < best_loss:
    best_epoch_loss = epoch + 1
    best_loss = valid_loss
    # if CFG.save_best_loss:
    #   # save the model's weights and biases only if CFG.save_best_loss == True
    #   torch.save(model.state_dict(), OUTPUT_PATH + f"DigitModel_ep{best_epoch_loss}.pth")

  # save validation accuracy if it was improved (increased)
  if valid_acc > best_acc:
    best_epoch_acc = epoch + 1
    best_acc = valid_acc
    # if CFG.save_best_accuracy:
    #   # save the model's weights and biases only if CFG.save_best_accuracy == True
    #   torch.save(model.state_dict(), OUTPUT_PATH + f"DigitModel_ep{best_epoch_acc}.pth")    

  end_epoch_time = time.time()
  time_delta = np.round(end_epoch_time - start_epoch_time, 3)
  print("\n\nEpoch Elapsed Time: {} s".format(time_delta))

end_train_time = time.time()
print("\n\nTotal Elapsed Time: {} min".format(np.round((end_train_time - start_train_time)/60, 3)))
print("Done!")


8000 Fashion MNIST => 6.521 min

VGG16 -> 114 seconds / epoch

Aleznet -> 7.189 min

GoogleNet -> 36 seconds / epoch


# Fast.ai

In [None]:
!pip install fastai

In [4]:
!pip install timm

Installing collected packages: timm
Successfully installed timm-0.5.4


In [1]:
import fastai

In [2]:
import torchvision
import torch
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

In [5]:
from fastai.vision.all import *
import timm

In [51]:
dataset = torchvision.datasets.FashionMNIST(root= '/content/fashionMNIST', 
                                            download = True,
                                            transform=transforms.Compose([transforms.Resize((224, 224)),
                                                                          transforms.ToTensor(),
                                                                          transforms.Lambda(lambda x: x.repeat(3, 1, 1) if x.size(0) == 1 else x)]),
                                            train= True)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /content/fashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting /content/fashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz to /content/fashionMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /content/fashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting /content/fashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /content/fashionMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /content/fashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting /content/fashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /content/fashionMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /content/fashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting /content/fashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /content/fashionMNIST/FashionMNIST/raw



In [52]:
dataset = [dataset[i] for i in range(10000)]
len(dataset)

10000

In [53]:
train_dataset,valid_dataset = train_test_split(dataset, train_size=0.8)

In [None]:
for i in train_dataset:
    print((i[0].shape))
    break

torch.Size([3, 224, 224])


In [None]:
del dataset

In [54]:
from fastai.data.core import DataLoaders
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
dls = DataLoaders.from_dsets(train_dataset, valid_dataset, device=device)

cuda:0


If it is a pytorch model, continue else go to next block

In [None]:
model = torchvision.models.googlenet

NameError: ignored

In [None]:
model

<function torchvision.models.googlenet.googlenet>

In [None]:
from fastai.vision.all import *
from fastai.vision import *
learn = vision_learner(dls, model, n_out=10, loss_func=torch.nn.CrossEntropyLoss(), pretrained=True)

if it is a timm model

In [None]:
import fastai

In [None]:
!pip install timm

In [None]:
!pip install wwf

In [None]:
import timm

In [None]:
import wwf

In [None]:
print(model)

None


Does not have already built weights

In [None]:
model = timm.models.helpers.load_custom_pretrained('vit_small_patch32_224_in21k', default_cfg=None, load_fn=None, progress=False, check_hash=False)

No pretrained weights exist for this model. Using random initialization.


In [None]:
learn = vision_learner(dls, model, n_out=10)

NameError: ignored

In [None]:
from timm import create_model
from fastai.vision.learner import _update_first_layer

meta = model_meta.get('vit_large_patch16_224', _default_meta)

NameError: ignored

In [None]:
import timm
timm.list_models(pretrained=True)

In [None]:
import timm
model = timm.create_model('vit_base_patch16_224', pretrained=True)
model.eval()



In [None]:
## incase of a ViT
ViT_model = timm.create_model('vit_large_patch16_224', pretrained=True, num_classes=10)
NN_model = timm.create_model('resnet34', pretrained=True, num_classes=10)

NameError: ignored

In [None]:
print(type(model))
print(type(NN_model))

timm.models.resnet.ResNet

In [None]:
import torchvision
from torchvision.models.vision_transformer import *
mode = torchvision.models.vision_transformer._vision_transformer()

AttributeError: ignored

In [None]:
# create_fn = timm.models.factory.model_entrypoint('vit_large_patch16_224')
# for i in create_fn():
#   print(i)
#   break
# with timm.models.factory.set_layer_config(scriptable=None, exportable=None, no_jit=None):
#   model = 


## Vision Transformer class 

TypeError: ignored

In [None]:
 with timm.models.factory.set_layer_config():
   new_model = create_fn(global_pool = 'avg')

TypeError: ignored

In [None]:
model

In [None]:
from wwf.vision.timm import *
import timm
from fastai.vision import *
from fastai.vision.all import *
learn = timm_learner(dls, 'vit_large_patch16_224', n_out=10, loss_func=torch.nn.CrossEntropyLoss(), pretrained=False)

In [None]:
learn.to(device)
dls.to(device)

NameError: ignored

In [None]:
learn.lr_find()

In [None]:
import time
start = time.time()
learn.fit(40, lr=0.0006918309954926372)
end = time.time()
print(end - start)

epoch,train_loss,valid_loss,time
0,0.81089,0.507932,00:29
1,0.548327,0.471854,00:28
2,0.394305,0.445984,00:29
3,0.342733,0.452206,00:29
4,0.285963,0.444868,00:29
5,0.249258,0.444358,00:29
6,0.211657,0.462455,00:29
7,0.170773,0.45134,00:29


KeyboardInterrupt: ignored

------------------------------------------------------------------------

8000 images Fashion MNIST => 15.9 min

VGG -> 51 sec/epoch

VGG_bn -> 104 sec/ epoch

Alexnet -> 283.59 seconds for 40 epochs

GoogleNet -> 27 seconds per epoch

MobileNetV2_100 -> 29 seconds / epoch

# FAST AI with transformers DEBUG

Look at github:: https://github.com/walkwithfastai/walkwithfastai.github.io/blob/master/wwf/vision/timm.py 

In [67]:
!pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [72]:
import timm
from timm import create_model

my_learner = timm_learner(dls, 'vit_large_patch16_224', loss_func=nn.CrossEntropyLoss(), n_out=10)

here
'works'
hmmm
oooo
alright
yesss
makes sense


In [None]:
my_learner.fit(40)

In [69]:
import torch.nn as nn
import pprint

from fastai.vision.learner import has_pool_type
from fastai.vision.learner import _update_first_layer

def create_timm_body(arch:str, pretrained=True, cut=None, n_in=3):
    "Creates a body from any model in the `timm` library."
    model = create_model(arch, pretrained=pretrained, num_classes=10)
    _update_first_layer(model, n_in, pretrained)
    if cut is None:
        try:
          ll = list(enumerate(model.children()))
          cut = next(i for i,o in reversed(ll) if has_pool_type(o)) ## i is the layer number and o is the type
        except StopIteration:
            cut = -1
            pass        
    if isinstance(cut, int): return nn.Sequential(*list(model.children())[:cut])
    elif callable(cut): return cut(model)
    else: raise NameError("cut must be either integer or function")

def create_timm_model(arch:str, n_out, cut=None, pretrained=True, n_in=3, init=nn.init.kaiming_normal_, custom_head=None,
                     concat_pool=True, **kwargs):
    "Create custom architecture using `arch`, `n_in` and `n_out` from the `timm` library"
    print("here")
    body = create_timm_body(arch, pretrained, None, n_in)
    print("hmmm")
    if custom_head is None:
        print("oooo")
        nf = get_num_features(body)
        # # nf = num_features_model(nn.Sequential(bodny.modules()))
        # # nf = num_features_model(*list(body.children()))
        # nf = 3072
        print("alright")
        head = create_head(nf, n_out, concat_pool=concat_pool, **kwargs)
    else: head = custom_head
    print("yesss")
    model = nn.Sequential(body, head)
    print("makes sense")
    if init is not None: apply_init(model[1], init)
    return model

def timm_learner(dls, arch:str, loss_func=None, pretrained=True, cut=None, splitter=None,
                y_range=None, config=None, n_out=None, normalize=True, **kwargs):
    "Build a convnet style learner from `dls` and `arch` using the `timm` library"
    if config is None: config = {}
    if n_out is None: n_out = get_c(dls)
    assert n_out, "`n_out` is not defined, and could not be inferred from data, set `dls.c` or pass `n_out`"
    if y_range is None and 'y_range' in config: y_range = config.pop('y_range')
    model = create_timm_model(arch, n_out, default_split, pretrained, y_range=y_range, **config)
    learn = Learner(dls, model, loss_func=loss_func, splitter=default_split, **kwargs)
    if pretrained: learn.freeze()
    return learn

In [63]:
from fastai.vision.all import *

def get_num_features(body):
  num_features = -1
  try:
    return(num_features_model(nn.Sequential(*body.children())))
  except:
    for i in range(len(body)):
      layer = body[-i+1]
      if isinstance(layer, torch.nn.Sequential):
        for block in layer:
          
          for sublayer in block.children():
            if isinstance(sublayer, timm.models.vision_transformer.Attention):
              for ll in sublayer.children():
                if isinstance(ll, torch.nn.modules.linear.Linear):
                  return ll.out_features
          break
        break

In [38]:
num_param = 0
for i in range(len(my_body)):
  layer = my_body[-i+1]
  if isinstance(layer, torch.nn.Sequential):
    for block in layer:
      print(type(block))

      for j in block.children():
        if isinstance(j, timm.models.vision_transformer.Attention):
          for o_f in j.children():
            # print(type(o_f)) ## linear layers will give out_features
            if isinstance(o_f, torch.nn.modules.linear.Linear):
              print(o_f.out_features)
              break
          break
      # print(block)
      # print(block.get_parameter("dim"))
      # for param in block.parameters():
      #   print(type(param))
      #   num_param+=1
      #   print("----------")
      break
  print("-----")
print(num_param)


-----
-----
-----
-----
<class 'timm.models.vision_transformer.Block'>
3072
-----
0


In [64]:
# my_model = create_timm_model('vit_large_patch16_224', n_out=10)

my_body = create_timm_body('resnet34', pretrained=True)

In [46]:
for i in range(len(my_body)):
  layer = my_body[-i+1]
  if isinstance(layer, torch.nn.Sequential):
    for block in layer:
      print(type(block))
      break
    break

  print("-----------------")

-----------------
-----------------
<class 'timm.models.resnet.BasicBlock'>


In [None]:
my_model

In [None]:
import time = time
my_learner.fit(40, lr=0.006)

<fastai.learner.Learner object at 0x7f2fc925eed0>


In [None]:
model = timm.create_model('vit_large_patch16_224', pretrained=True, num_classes=10)

In [None]:
import timm

In [None]:
my_body = create_timm_body('vit_large_patch16_224')

In [None]:
i = 1
seq = None
while True and i <= len(list(my_body.children())):
    try:
        seq = num_features_model(list(my_body.children())[-i])
        break
    except:
        i += 1
print(i)
print(seq)

In [None]:
from fastai.vision.all import *

In [None]:
cnn_model = create_timm_model('resnet34', n_out=10)

In [None]:
cnn_body = create_timm_body('resnet34')

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resnet34-43635321.pth" to /root/.cache/torch/hub/checkpoints/resnet34-43635321.pth


In [None]:
i = 1
seq = None
while True and i <= len(list(cnn_body.children())):
    try:
        seq = num_features_model(list(cnn_body.children())[-i])
        break
    except:
        i += 1
print(i)
print(seq)

In [None]:
print(cnn_body)

In [None]:
my_body

In [None]:
childrens = my_body.children()
child_list = list(childrens)
new_list = []
index = 0
for i in range(-1 * len(child_list), 0):
    new_list.insert(index, child_list[i])    

In [None]:
layers = new_list[2][0].modules()

In [None]:
layers

<generator object Module.modules at 0x7f302494eb50>

In [None]:
for i in layers:
    j = i.modules()
    print(j)
    for k in j:
        print(k)
    print("-----------------------")

In [None]:
childrens

<generator object Module.children at 0x7fbc027afc50>

In [None]:
seq = list(childrens)

In [None]:
print(num_features_model(seq))

AttributeError: ignored

In [None]:
for i in childrens:
    print(i)
    print("-------------------VIDUSHI-------------------")

Sequential(
  (0): PatchEmbed(
    (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (1): Dropout(p=0.0, inplace=False)
)

In [None]:
model = create_model('resnet34')

In [None]:
model

In [None]:
body = create_timm_body('resnet34', cut=5)

In [None]:
body

In [None]:
ll = list(enumerate(model.children()))
ll = reversed(ll)

for i in ll:
  print(i)
# cut = None
# try:
#   cut = next(i for i, o in reversed(ll) if has_pool_type(o))
# except StopIteration:
#   pass
# print(cut)

(5, Linear(in_features=1024, out_features=10, bias=True))
(4, Identity())
(3, LayerNorm((1024,), eps=1e-06, elementwise_affine=True))
(2, Sequential(
  (0): Block(
    (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
    (attn): Attention(
      (qkv): Linear(in_features=1024, out_features=3072, bias=True)
      (attn_drop): Dropout(p=0.0, inplace=False)
      (proj): Linear(in_features=1024, out_features=1024, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
    )
    (drop_path): Identity()
    (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
    (mlp): Mlp(
      (fc1): Linear(in_features=1024, out_features=4096, bias=True)
      (act): GELU()
      (drop1): Dropout(p=0.0, inplace=False)
      (fc2): Linear(in_features=4096, out_features=1024, bias=True)
      (drop2): Dropout(p=0.0, inplace=False)
    )
  )
  (1): Block(
    (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
    (attn): Attention(
      (qkv): Linear(in_features

In [None]:
from fastai.vision.learner import _add_norm

In [None]:
!pip install timm

In [None]:
from fastai.vision.all import *
# Cell
from fastai.vision.learner import _update_first_layer

In [None]:
ViT_learner = timm_learner(dls, 'vit_large_patch16_224', n_out=10, loss_func=nn.CrossEntropyLoss())

StopIteration: ignored

In [None]:
NN_learner = timm_learner(dls, 'resnet34', n_out=10, loss_func=nn.CrossEntropyLoss())

In [None]:
import time
start = time.time()
ViT_learner.fit(40, lr=0.0006918309954926372)
end = time.time()
print(end - start)

epoch,train_loss,valid_loss,time


In [None]:
import time
start = time.time()
NN_learner.fit(40, lr=0.0006918309954926372)
end = time.time()
print(end - start)