<a href="https://colab.research.google.com/github/vjhawar12/FreshNET-A-mobileNET-adaptation/blob/main/FreshNET.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [None]:
!pip -q install lightning-bolts

In [None]:
import torch
import torch.nn as nn
from torch import optim
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
from pl_bolts.optimizers.lr_scheduler import LinearWarmupCosineAnnealingLR
from copy import deepcopy
from torch.quantization import QuantStub, DeQuantStub, prepare_qat, get_default_qat_qconfig, convert, fuse_modules

# Loading dataset from Google Cloud Storage

In [None]:
!gcloud auth login

In [None]:
!gcloud auth application-default login

In [None]:
!gcloud config set project freshnet-466505

In [6]:
def download_bucket_with_transfer_manager(bucket_name, destination_directory="", workers=8, max_results=1000):

    from google.cloud.storage import Client, transfer_manager

    client = Client()
    bucket = client.bucket(bucket_name)

    blob_names = [blob.name for blob in bucket.list_blobs(max_results=max_results)]

    results = transfer_manager.download_many_to_path(
        bucket, blob_names, destination_directory=destination_directory, max_workers=workers
    )

    for name, result in zip(blob_names, results):
        if isinstance(result, Exception):
            print("Failed to download {} due to exception: {}".format(name, result))
        else:
            print("Downloaded {} to {}.".format(name, destination_directory + name))

download_bucket_with_transfer_manager("freshnet-images", destination_directory="/content/dataset/", workers=8, max_results=None)



Failed to download test/ due to exception: [Errno 21] Is a directory: '/content/dataset/test/'
Failed to download test/fresh/ due to exception: [Errno 21] Is a directory: '/content/dataset/test/fresh/'
Downloaded test/fresh/test_fresh_images.zip to /content/dataset/test/fresh/test_fresh_images.zip.
Failed to download test/rotten/ due to exception: [Errno 21] Is a directory: '/content/dataset/test/rotten/'
Downloaded test/rotten/test_rotten_images.zip to /content/dataset/test/rotten/test_rotten_images.zip.
Failed to download train/ due to exception: [Errno 21] Is a directory: '/content/dataset/train/'
Failed to download train/fresh/ due to exception: [Errno 21] Is a directory: '/content/dataset/train/fresh/'
Downloaded train/fresh/train_fresh_images.zip to /content/dataset/train/fresh/train_fresh_images.zip.
Failed to download train/rotten/ due to exception: [Errno 21] Is a directory: '/content/dataset/train/rotten/'
Downloaded train/rotten/train_rotten_images.zip to /content/dataset/tr

In [7]:
!cd /content/dataset/train/rotten && unzip -j train_rotten_images.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: DSCN4187.jpg_0_9314.jpg  
  inflating: rotated_by_15_Screen Shot 2018-06-12 at 8.50.15 PM.png  
  inflating: rotated_by_30_Screen Shot 2018-06-12 at 9.07.26 PM.png  
  inflating: a_r243.png              
  inflating: Copy of IMG_20200801_191339.jpg_0_9702.jpg  
  inflating: Copy of IMG_20200729_164219.jpg_0_7214.jpg  
  inflating: vertical_flip_Screen Shot 2018-06-12 at 11.23.19 PM.png  
  inflating: saltandpepper_Screen Shot 2018-06-07 at 2.20.56 PM.png  
  inflating: rotated_by_45_Screen Shot 2018-06-07 at 2.47.35 PM.png  
  inflating: c_r377.png              
  inflating: rotated_by_30_Screen Shot 2018-06-12 at 9.20.31 PM.png  
  inflating: Screen Shot 2018-06-12 at 9.12.11 PM.png  
  inflating: WhatsApp Image 2020-11-07 at 11.30.47 PM.jpg_0_1426.jpg  
  inflating: rotated_by_15_Screen Shot 2018-06-12 at 8.56.01 PM.png  
  inflating: p_r013.png              
  inflating: saltandpepper_Screen Shot 2018-06-1

In [8]:
!cd /content/dataset/train/fresh && unzip -j train_fresh_images.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Day2.jpg_0_803.jpg      
  inflating: rotated_by_30_Screen Shot 2018-06-08 at 5.06.54 PM.png  
  inflating: Screen Shot 2018-06-12 at 11.59.19 PM.png  
  inflating: rotated_by_30_Screen Shot 2018-06-12 at 9.59.07 PM.png  
  inflating: translation_Screen Shot 2018-06-12 at 9.41.18 PM.png  
  inflating: b_f669.png              
  inflating: o_f066.png              
  inflating: o_f587.png              
  inflating: c_f217.png              
  inflating: p_f237.png              
  inflating: t_f479.png              
  inflating: c_f086.png              
  inflating: p_f242.png              
  inflating: tomato1_19.jpg_0_4842.jpg  
  inflating: rotated_by_60_Screen Shot 2018-06-12 at 9.41.26 PM.png  
  inflating: a_f221.png              
  inflating: saltandpepper_Screen Shot 2018-06-08 at 5.18.16 PM.png  
  inflating: Screen Shot 2018-06-12 at 9.51.24 PM.png  
  inflating: rotated_by_75_Screen Shot 2018-06-12 at 

In [9]:
!cd /content/dataset/test/fresh && unzip -j test_fresh_images.zip

Archive:  test_fresh_images.zip
  inflating: b_f304.png              
  inflating: o_f287.png              
  inflating: translation_Screen Shot 2018-06-08 at 5.25.28 PM.png  
  inflating: a_f175.png              
  inflating: o_f189.png              
  inflating: b_f452.png              
  inflating: rotated_by_75_Screen Shot 2018-06-12 at 10.04.49 PM.png  
  inflating: b_f200.png              
  inflating: rotated_by_60_Screen Shot 2018-06-12 at 11.50.33 PM.png  
  inflating: a_f119.png              
  inflating: rotated_by_75_Screen Shot 2018-06-13 at 12.00.06 AM.png  
  inflating: p_f014.png              
  inflating: rotated_by_45_Screen Shot 2018-06-12 at 9.40.26 PM.png  
  inflating: a_f349.png              
  inflating: t_f108.png              
  inflating: p_f027.png              
  inflating: t_f140.png              
  inflating: b_f355.png              
  inflating: a_f303.png              
  inflating: b_f310.png              
  inflating: rotated_by_60_Screen Shot 2018-06-

In [10]:
!cd /content/dataset/test/rotten && unzip -j test_rotten_images.zip

Archive:  test_rotten_images.zip
  inflating: p_r367.png              
  inflating: rotated_by_60_Screen Shot 2018-06-12 at 9.03.34 PM.png  
  inflating: t_r290.png              
  inflating: o_r117.png              
  inflating: o_r208.png              
  inflating: vertical_flip_Screen Shot 2018-06-12 at 11.32.17 PM.png  
  inflating: b_r316.png              
  inflating: rotated_by_30_Screen Shot 2018-06-07 at 2.46.04 PM.png  
  inflating: Screen Shot 2018-06-08 at 2.34.37 PM.png  
  inflating: b_r313.png              
  inflating: a_r225.png              
  inflating: o_r046.png              
  inflating: saltandpepper_Screen Shot 2018-06-12 at 8.52.01 PM.png  
  inflating: a_r285.png              
  inflating: b_r285.png              
  inflating: p_r052.png              
  inflating: rotated_by_45_Screen Shot 2018-06-12 at 11.29.26 PM.png  
  inflating: c_r033.png              
  inflating: o_r203.png              
  inflating: vertical_flip_Screen Shot 2018-06-12 at 9.27.15 PM.p

# Hyperparameters

In [11]:
BATCH_SIZE = 256
RANDOM_SEED = 42
TRAIN_SIZE, VAL_SIZE = 0.8, 0.2
IMG_SIZE = 320 # Original images are ~400*400 px so resizing them to 320 retains detail while reducing computational cost
EPOCHS = 30
WARMUP_DUR = 10 # num of warmup epochs
LR_MIN = 0.0001 # minimum learning rate
MILD_DROPOUT_RATE = 0.10 # mild enough to avoid overfitting on a small dataset

# Preprocessing

In [12]:
"""
    Mild preprocessing only. No MixUp, CutMix, RandomCrop, or ColorJitter because a lightweight model like this one
    is less likely to overfit. Also, this model needs to perform fine-grained classification,
    so aggressive augmentations could distort the small image regions (like signs of fungi or discoloration)
    that are crucial for accurate prediction.
"""

train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation([-180, 180]),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),

])

test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Loading data into Dataloader

In [13]:
path_to_train_imgs = "/content/dataset/train"
path_to_test_imgs = "/content/dataset/test"

full_train_data = ImageFolder(path_to_train_imgs, transform=train_transform)
test_data = ImageFolder(path_to_test_imgs, transform=test_transform)
train_data, val_data = random_split(full_train_data, [TRAIN_SIZE, VAL_SIZE], generator=torch.Generator().manual_seed(RANDOM_SEED))

print(f"{full_train_data.class_to_idx} \n {test_data.class_to_idx}")

{'fresh': 0, 'rotten': 1} 
 {'fresh': 0, 'rotten': 1}


In [14]:
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=10)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
val_dataloader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# CUDA optimizations

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if torch.cuda.is_available():
  torch.backends.cuda.matmul.allow_tf32 = True
  torch.backends.cudnn.allow_tf32 = True
  torch.backends.cuda.enable_flash_sdp(True)
  torch.backends.cuda.enable_mem_efficient_sdp(True)
  torch.backends.cuda.enable_math_sdp(True)

# Depthwise Seperable Convolution implementation

In [16]:
"""
Helper class for FreshNET. This is the implementation of the "Depthwise seperable convolution" as outlined in the MobileNET paper. Instances of this class are stacked
together and each stack constitutes a hidden layer in the NN. A DepthwiseSeperableConvolution instance is structured as follows:

- Pointwise convolution (applied along all channels at a single pixel) for channel expansion.
- Depthwise convolution (applies across each channel individually) for the model to efficiently learn features in a high dimensional space.
- Pointwise convolution to compress channels slightly.

In between the convolutional layers are batch normalizations and after the final layer is an activation function.
"""

class DepthwiseSeperableConvolution(nn.Module):
  def __init__(self, in_channels, out_channels, exp_factor, downsample_factor, kernel_size=3):
    super().__init__()

    self.in_channels = in_channels
    self.out_channels = out_channels
    self.exp_factor = exp_factor
    self.downsample_factor = downsample_factor
    self.kernel_size = kernel_size

    out = self.in_channels * self.exp_factor

    self.preserve = self.in_channels == self.out_channels
    """
        batchnorm normalizes inputs which has shown to improve training. It's typically applied after the convolutional layer output and before the activation function
        The MobileNET paper also mentions, with the exception of the final fully connected linear layer, "all layers are followed by a batchnorm and ReLU nonlinearity"
    """

    self.block = nn.Sequential(
        nn.Conv2d(in_channels=self.in_channels, out_channels=out, kernel_size=1, stride=1, groups=1), # expansion
        nn.BatchNorm2d(out),

        # adding padding because the image downsamples, but skip connections require same size
        nn.Conv2d(in_channels=out, out_channels=out, kernel_size=self.kernel_size, padding=self.kernel_size // 2, stride=self.downsample_factor, groups=out), # depthwise
        nn.BatchNorm2d(out),
        nn.Conv2d(in_channels=out, out_channels=self.out_channels, kernel_size=1, stride=1, groups=1), # pointwise
        nn.BatchNorm2d(self.out_channels),

        nn.ReLU6(), # output ∈ [0, 6] ==> more efficient than regular ReLU
    )

    self.depthwise = self.block[2] # storing a reference to the depthwise layer since qconfig must be disabled for this particular layer

  def forward(self, x):
    x = self.block(x)
    return x

# Skip connection

In [17]:
# a simple skip connection implementation

"""
FreshNET only applies skip connections between DepthwiseSeperableConvolution instances which preserve channel dimension in order to adhere to the laws of vector addition.
"""
class SkipConnection(nn.Module):
  def __init__(self, seq): # seq is an nn.Sequential instance
    super().__init__()

    self.seq = seq

    for module in seq:
      if not hasattr(module, 'preserve'):
        raise Exception("Cannot apply skip connection between layers of different channel dimensions")

    self.seq = seq

  def forward(self, x):
    self.gradient = x

    for depthwise_sep_conv in self.seq:
      x = depthwise_sep_conv(x)

    return x + self.gradient

# FreshNet: A MobileNET adaptation

In [18]:
# A MobileNET adaptation
class FreshNET(nn.Module):

  """
  FreshNET applies the concept of depthwise seperable convolutions as mentioned in the MobileNet paper, but the image dimensions are slightly increased
  to better suit the dataset and leverage higher image quality. Specifically, FreshNET decouples spatial filtering and channel mixing,
  the two operations traditionally combined in standard convolutions, and performs them as separate, more efficient operations. As the authors of MobileNet noted,
  it "drastically reduc[es] computation and model size". Despite this computational efficiency, accuracy is largely preserved -- the original MobileNet paper reports
  only about a 1% drop in accuracy compared to standard convolutions.

  This separation is implemented in DepthwiseSeperableConvolution class. Each instance consists of:
  - A pointwise 1*1 convolution for channel expansion, increasing feature dimensionality.
  - A depthwise 3*3 convolution applied independently per channel for spatial filtering.
  - Another pointwise convolution to project back to a lower-dimensional space.

  This design significantly reduces computational cost while maintaining high representational power. There are 10 layers, of which 7 are DepthwiseSeperableConvolution layers.
  Each DepthwiseSeperableConvolution layer consists of 1-4 DepthwiseSeperableConvolution instances. Within the DepthwiseSeperableConvolution layer, there are non-linear activation functions.

  Skip connections are also applied where possible to improve gradient flow and preserve fine-grained information. For a task like fresh/rotten classification,
  fine-grained information can be very valuable.
  """

  def __init__(self):
    super().__init__()

    # QuantStub and DeQuantStub are used to go from FL32 <===> quantized
    self.quant = QuantStub()
    self.dequant = DeQuantStub()

    # SiLU is good for vanishing gradients and can be applied in the later layers where ReLU might not be as effective (dead neurons)
    silu = nn.SiLU()

    # Regularization
    """
    The original paper notes "we use less regularization and data augmentation techniques because small models have less trouble with overfitting".
    This is why I chose to use dropout with 15% probability since that's on the lower end of the probability spectrum.
    """
    dropout_mild = nn.Dropout(MILD_DROPOUT_RATE)

    # initial regular convolution
    initial_conv = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=2) # 320*320*3 --> 160*160*32

    # inverted residual block 1: 160*160*32 --> 160*160*16, channel expansion = 1
    block_1 = nn.Sequential(
        DepthwiseSeperableConvolution(32, 16, 1, 1)
    )

    # inverted residual block 2: 160*160*16 --> 80*80*24, channel expansion = 6
    block_2 = nn.Sequential(
        DepthwiseSeperableConvolution(16, 24, 6, 2),
        DepthwiseSeperableConvolution(24, 24, 6, 1)
    )

    # inverted residual block 3: 80*80*24 --> 40*40*32, channel expansion = 6
    block_3 = nn.Sequential(
        DepthwiseSeperableConvolution(24, 32, 6, 2),
        SkipConnection(nn.Sequential(
              DepthwiseSeperableConvolution(32, 32, 6, 1),
              DepthwiseSeperableConvolution(32, 32, 6, 1),
          )
        )
    )

    # inverted residual block 4: 40*40*32 --> 20*20*64, channel expansion = 6
    block_4 = nn.Sequential(
        DepthwiseSeperableConvolution(32, 64, 6, 2),
        SkipConnection(nn.Sequential(
              DepthwiseSeperableConvolution(64, 64, 6, 1),
              DepthwiseSeperableConvolution(64, 64, 6, 1),
              DepthwiseSeperableConvolution(64, 64, 6, 1),
            )
        )
    )

    # inverted residual block 5: 20*20*64 --> 20*20*96, channel expansion = 6
    block_5 = nn.Sequential(
        DepthwiseSeperableConvolution(64, 96, 6, 1),
        SkipConnection(nn.Sequential(
              DepthwiseSeperableConvolution(96, 96, 6, 1),
              DepthwiseSeperableConvolution(96, 96, 6, 1),
            )
        )
    )

    # inverted residual block 6: 20*20*96 --> 10*10*160, channel expansion = 6
    block_6 = nn.Sequential(
        DepthwiseSeperableConvolution(96, 160, 6, 2),
        SkipConnection(nn.Sequential(
              DepthwiseSeperableConvolution(160, 160, 6, 1),
              DepthwiseSeperableConvolution(160, 160, 6, 1),
            )
        )
    )

    # inverted residual block 7: 10*10*160 --> 10*10*320, channel expansion = 6
    block_7 = nn.Sequential(
        DepthwiseSeperableConvolution(160, 320, 6, 1)
    )

    # final regular convolution
    final_conv = nn.Conv2d(in_channels=320, out_channels=1280, kernel_size=1, stride=1) # 10*10*320 --> 10*10*1280

    """
      MaxPooling is often preferred for highlighting dominant features, but it can be too aggressive in lightweight models
      like MobileNet or EfficientNet, which already have low spatial resolution. It can discard valuable spatial
      information. Average Pooling computes the average of nearby pixels, preserving more context. This makes it more
      suitable for compact architectures like this one.
    """

    avg_pool = nn.AvgPool2d(10) # 10*10*1280 --> 1*1*1280
    flatten = nn.Flatten() # fc layers expect a single tensor not a feature map

    # fully connected layers
    """
    The MobileNET paper mapped from 1280 directly to 1000 since it was being trained on ImageNET. My dataset, however, only has 2
    output classes: fresh (0) or rotten (1). Mapping directly from 1280 to 2 is an abrupt jump which could limit the model's ability to
    distinguish between classes, so I introduced an additional layer to map from 1280 to 500 then applied nonlinearity and
    mild dropout before going from 500 to 2.
    """
    fc_1 = nn.Linear(in_features=1280, out_features=500)
    fc_2 = nn.Linear(in_features=500, out_features=2)

    """
    The sequence of layers is as follows:

      - intial regular convolution
      - 4 stacks of DepthwiseSeperableConvolution
      - mild dropout for regularization
      - 3 more stacks of DepthwiseSeperableConvolution
      - 1 final regular convolution
      - average pooling
      - activation function
      - first fully connected linear layer
      - activation function
      - mild dropout for regularization
      - final fully connected linear layer

    There are skip connections in blocks 3-6.
    """

    self.layers = nn.Sequential(
        initial_conv,

        block_1,
        block_2,
        block_3,
        block_4,
        dropout_mild,

        block_5,
        block_6,
        block_7,

        final_conv,
        avg_pool,
        flatten,
        silu,
        dropout_mild,

        fc_1,
        silu,

        fc_2
    )

  # fusing conv + bn + relu improves speed and efficiency during QAT
  def fuse(self):
    for module_name, module in self.named_children(): # iterating over the instances defined in __init__ of this class
      if "block" in module_name: # accessing just the block_n instances
        for submodule_name, submodule in module.named_children(): # iterating over the anonymous objects inside the sequential object
          for block_name, block in submodule.named_children(): # iterating over the anonymous objects inside DepthwiseSeperableConvolutions
            if "block" in block_name: # accessing just the sequential of DepthwiseSeperableConvolutions
                fuse_modules(block, [["0", "1"], ["2", "3"], ["4", "5", "6"]], inplace=True) # fusing the conv, bn, and relu layers of DepthwiseSeperableConvolutions

  # excluding layers that can't be quantized e.g. depthwise layers bc groups != 1
  def exclude(self):
    for module_name, module in self.named_children(): # iterating over the instances defined in __init__ of this class
      if "block" in module_name: # accessing just the block_n instances
        for submodule_name, submodule in module.named_children(): # iterating over the anonymous objects inside the sequential object
          for block_name, block in submodule.named_children(): # iterating over the anonymous objects inside DepthwiseSeperableConvolutions
            if "block" in block_name: # accessing just the sequential of DepthwiseSeperableConvolutions
                block[2].qconfig = None # groups != 1 for depthwise convs so they're not involved in quantization


  def forward(self, x):
    x = self.quant(x)

    for layer in self.layers:
      x = layer(x)

    x = self.dequant(x)

    return x

# Setup

In [19]:
cnn = FreshNET()
cnn.to(device)

loss_fn = nn.CrossEntropyLoss() # std loss function for classification

optimizer = optim.Adam(cnn.parameters()) # RMSProp is pretty sensitive to changes in LR so i wanted to try Adam

scaler = torch.amp.GradScaler()

"""
Not mentioned in the original paper, but this scheduler was used because linear warmup reduces volatility in the earlier epochs.
Cosine annealing can improve training stability and convergence and declining LR steadily means the model will rely more on the features
it learns early on -- the key differentiators -- rather than picking up potential noise and overfitting.
"""
scheduler = LinearWarmupCosineAnnealingLR(optimizer, warmup_epochs=WARMUP_DUR, eta_min=LR_MIN, max_epochs=EPOCHS)

  scheduler = LinearWarmupCosineAnnealingLR(optimizer, warmup_epochs=WARMUP_DUR, eta_min=LR_MIN, max_epochs=EPOCHS)


# Train and validate

In [20]:
def train(cnn, epochs=EPOCHS, scaler=None):
  cnn.train()

  for i in range(EPOCHS):
    torch.cuda.empty_cache()
    correct, total = 0, 0
    running_loss = 0

    cnn.train(True)

    loop = tqdm(train_dataloader, desc=f"Epoch {i+1}/{EPOCHS}", leave=True, disable=False)

    for j, (input, labels) in enumerate(loop, 1):
      input, labels = input.to(device), labels.to(device)
      optimizer.zero_grad()

      output = cnn(input)
      loss = loss_fn(output, labels)
      running_loss += loss.item()

      if scaler:
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
      else:
        loss.backward()
        optimizer.step()

      avg_loss = running_loss / j
      loop.set_postfix(loss=avg_loss)

      pred = torch.argmax(output, dim=1)

      total += labels.size(0)
      correct += (pred == labels).sum().item()


    cnn.eval()
    val_total, val_correct = 0, 0

    with torch.no_grad():
      for image, label in val_dataloader:
        image, label = image.to(device), label.to(device)
        output = cnn(image)

        pred = torch.argmax(output, dim=1)
        val_total += label.size(0)
        val_correct += (pred == label).sum().item()

      val_accuracy = val_correct / val_total
      accuracy = correct / total


    scheduler.step()

    current_lr = optimizer.param_groups[0]['lr']
    print(f"Epoch {i + 1}: LR={current_lr:.6f} \t Train Acc: {accuracy:.4f} \t Val Acc: {val_accuracy:.4f}")

In [21]:
cnn.to("cuda")
compiled_cnn = torch.compile(cnn)

with torch.autocast(device_type='cuda', dtype=torch.float16):
  train(compiled_cnn, scaler=scaler)

Epoch 1/30: 100%|██████████| 74/74 [03:36<00:00,  2.92s/it, loss=0.692]


Epoch 1: LR=0.000111 	 Train Acc: 0.5250 	 Val Acc: 0.5289


Epoch 2/30: 100%|██████████| 74/74 [00:28<00:00,  2.60it/s, loss=0.416]


Epoch 2: LR=0.000222 	 Train Acc: 0.8011 	 Val Acc: 0.8562


Epoch 3/30: 100%|██████████| 74/74 [00:28<00:00,  2.61it/s, loss=0.286]


Epoch 3: LR=0.000333 	 Train Acc: 0.8826 	 Val Acc: 0.8850


Epoch 4/30: 100%|██████████| 74/74 [00:28<00:00,  2.60it/s, loss=0.252]


Epoch 4: LR=0.000444 	 Train Acc: 0.8984 	 Val Acc: 0.9071


Epoch 5/30: 100%|██████████| 74/74 [00:28<00:00,  2.63it/s, loss=0.221]


Epoch 5: LR=0.000556 	 Train Acc: 0.9123 	 Val Acc: 0.9011


Epoch 6/30: 100%|██████████| 74/74 [00:28<00:00,  2.60it/s, loss=0.204]


Epoch 6: LR=0.000667 	 Train Acc: 0.9170 	 Val Acc: 0.8829


Epoch 7/30: 100%|██████████| 74/74 [00:28<00:00,  2.59it/s, loss=0.192]


Epoch 7: LR=0.000778 	 Train Acc: 0.9227 	 Val Acc: 0.9138


Epoch 8/30: 100%|██████████| 74/74 [00:28<00:00,  2.57it/s, loss=0.17]


Epoch 8: LR=0.000889 	 Train Acc: 0.9307 	 Val Acc: 0.8833


Epoch 9/30: 100%|██████████| 74/74 [00:28<00:00,  2.61it/s, loss=0.167]


Epoch 9: LR=0.001000 	 Train Acc: 0.9320 	 Val Acc: 0.9405


Epoch 10/30: 100%|██████████| 74/74 [00:28<00:00,  2.60it/s, loss=0.148]


Epoch 10: LR=0.001000 	 Train Acc: 0.9393 	 Val Acc: 0.9115


Epoch 11/30: 100%|██████████| 74/74 [00:28<00:00,  2.60it/s, loss=0.139]


Epoch 11: LR=0.000994 	 Train Acc: 0.9432 	 Val Acc: 0.9519


Epoch 12/30: 100%|██████████| 74/74 [00:28<00:00,  2.58it/s, loss=0.114]


Epoch 12: LR=0.000978 	 Train Acc: 0.9532 	 Val Acc: 0.9439


Epoch 13/30: 100%|██████████| 74/74 [00:28<00:00,  2.63it/s, loss=0.116]


Epoch 13: LR=0.000951 	 Train Acc: 0.9525 	 Val Acc: 0.9280


Epoch 14/30: 100%|██████████| 74/74 [00:28<00:00,  2.60it/s, loss=0.0962]


Epoch 14: LR=0.000914 	 Train Acc: 0.9616 	 Val Acc: 0.9608


Epoch 15/30: 100%|██████████| 74/74 [00:28<00:00,  2.59it/s, loss=0.0862]


Epoch 15: LR=0.000868 	 Train Acc: 0.9654 	 Val Acc: 0.9593


Epoch 16/30: 100%|██████████| 74/74 [00:28<00:00,  2.60it/s, loss=0.0845]


Epoch 16: LR=0.000815 	 Train Acc: 0.9675 	 Val Acc: 0.9638


Epoch 17/30: 100%|██████████| 74/74 [00:28<00:00,  2.59it/s, loss=0.0832]


Epoch 17: LR=0.000754 	 Train Acc: 0.9679 	 Val Acc: 0.9564


Epoch 18/30: 100%|██████████| 74/74 [00:28<00:00,  2.59it/s, loss=0.0803]


Epoch 18: LR=0.000689 	 Train Acc: 0.9684 	 Val Acc: 0.9723


Epoch 19/30: 100%|██████████| 74/74 [00:28<00:00,  2.61it/s, loss=0.071]


Epoch 19: LR=0.000620 	 Train Acc: 0.9722 	 Val Acc: 0.9308


Epoch 20/30: 100%|██████████| 74/74 [00:28<00:00,  2.57it/s, loss=0.0579]


Epoch 20: LR=0.000550 	 Train Acc: 0.9771 	 Val Acc: 0.9763


Epoch 21/30: 100%|██████████| 74/74 [00:28<00:00,  2.60it/s, loss=0.0619]


Epoch 21: LR=0.000480 	 Train Acc: 0.9767 	 Val Acc: 0.9729


Epoch 22/30: 100%|██████████| 74/74 [00:28<00:00,  2.59it/s, loss=0.0538]


Epoch 22: LR=0.000411 	 Train Acc: 0.9789 	 Val Acc: 0.9754


Epoch 23/30: 100%|██████████| 74/74 [00:28<00:00,  2.61it/s, loss=0.0457]


Epoch 23: LR=0.000346 	 Train Acc: 0.9827 	 Val Acc: 0.9824


Epoch 24/30: 100%|██████████| 74/74 [00:27<00:00,  2.66it/s, loss=0.0446]


Epoch 24: LR=0.000285 	 Train Acc: 0.9827 	 Val Acc: 0.9828


Epoch 25/30: 100%|██████████| 74/74 [00:28<00:00,  2.61it/s, loss=0.043]


Epoch 25: LR=0.000232 	 Train Acc: 0.9833 	 Val Acc: 0.9822


Epoch 26/30: 100%|██████████| 74/74 [00:28<00:00,  2.64it/s, loss=0.0379]


Epoch 26: LR=0.000186 	 Train Acc: 0.9855 	 Val Acc: 0.9814


Epoch 27/30: 100%|██████████| 74/74 [00:28<00:00,  2.59it/s, loss=0.0332]


Epoch 27: LR=0.000149 	 Train Acc: 0.9874 	 Val Acc: 0.9860


Epoch 28/30: 100%|██████████| 74/74 [00:28<00:00,  2.59it/s, loss=0.0334]


Epoch 28: LR=0.000122 	 Train Acc: 0.9873 	 Val Acc: 0.9903


Epoch 29/30: 100%|██████████| 74/74 [00:28<00:00,  2.61it/s, loss=0.032]


Epoch 29: LR=0.000106 	 Train Acc: 0.9883 	 Val Acc: 0.9877


Epoch 30/30: 100%|██████████| 74/74 [00:28<00:00,  2.59it/s, loss=0.0289]


Epoch 30: LR=0.000100 	 Train Acc: 0.9891 	 Val Acc: 0.9875


# Saving model weights

In [22]:
cnn.to("cpu")
torch.save(cnn.state_dict(), "/content/modelweight.pth")

from google.colab import files
files.download(r"/content/modelweight.pth")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Loading saved model

In [37]:
from google.colab import files
#files.upload()
weights = torch.load('/content/modelweight.pth', map_location=torch.device('cpu'))
cnn = FreshNET()
cnn.load_state_dict(weights)

<All keys matched successfully>

# Model size (no quantization)

In [39]:
param_size = 0

for param in cnn.parameters():
    param_size += param.nelement() * param.element_size()

buffer_size = 0

for buffer in cnn.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

size_all_mb = (param_size + buffer_size) / 1024**2
print('model size: {:.3f}MB'.format(size_all_mb))

model size: 11.111MB


# Final test accuracy (no quantization)

In [40]:
def test(test_dataloader, cnn):
  with torch.no_grad():
    cnn.eval()
    accuracy = 0
    total = 0

    for batch in test_dataloader:
      images, labels = batch
      images = images.to(device)
      labels = labels.to(device)

      predicted = torch.argmax(cnn(images), dim=1)
      accuracy += (predicted == labels).sum().item()
      total += labels.size(0)

    avg_acc = accuracy / total

    return avg_acc

In [41]:
cnn.to("cuda")
cnn.eval()

with torch.autocast(device_type='cuda', dtype=torch.float16):
  test_acc = test(test_dataloader, cnn)
  print(f"Final test accuracy: {100 * test_acc:.2f}%")

Final test accuracy: 97.61%


# Quantizing model

Quantization Aware Training (QAT) involves training the model with full floating point precision, float32, while simulating the effects of int8 quantization. This 'fake' quantization makes the model well-equipped to perform under quantization, maintaining accuracy better than if it used post-training  quantization instead.


In [42]:
cnn.to("cpu") # model must be on cpu while fusing
cnn.train()
cnn.fuse() # custom fuse method fuses the DepthwiseSeperableConvolution layers like conv2d, batchnorm, and relu
config = get_default_qat_qconfig("qnnpack") # for ARM-based architecture, qnnpack is the correct config mode
cnn.qconfig = config
cnn.exclude() # excluding problematic layers for quantization
prepare_qat(cnn, inplace=True) # preparing the model for QAT

FreshNET(
  (quant): QuantStub(
    (activation_post_process): FusedMovingAvgObsFakeQuantize(
      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.quint8, quant_min=0, quant_max=255, qscheme=torch.per_tensor_affine, reduce_range=False
      (activation_post_process): MovingAverageMinMaxObserver(min_val=inf, max_val=-inf)
    )
  )
  (dequant): DeQuantStub()
  (layers): Sequential(
    (0): Conv2d(
      3, 32, kernel_size=(3, 3), stride=(2, 2)
      (weight_fake_quant): FusedMovingAvgObsFakeQuantize(
        fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.qint8, quant_min=-128, quant_max=127, qscheme=torch.per_tensor_symmetric, reduce_range=False
        (activation_post_process): MovingAverageMinMaxObserver(min_val=inf, max_val=-inf)
      )
      (activation_post_process): FusedMovingAvgObsFakeQuantize(
    

In [43]:
cnn.to("cpu") # need to switch back to cpu before quantizing
cnn.eval()
quantized_cnn = convert(cnn, inplace=False) # quantizing cnn



# Model size (with quantization)


In [44]:
param_size = 0

for param in quantized_cnn.parameters():
    param_size += param.nelement() * param.element_size()

buffer_size = 0

for buffer in quantized_cnn.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

size_all_mb = (param_size + buffer_size) / 1024**2
print('model size: {:.3f}MB'.format(size_all_mb))

model size: 0.242MB


# Final test accuracy (with quantization)

In [None]:
# session keeps crashing here -- not sure why

quantized_cnn.to("cuda")
test_acc = test(test_dataloader, quantized_cnn)
print(f"Final test accuracy of quantized model: {100 * test_acc:.2f}%")