In [None]:
# In[1]: Imports & Device
import os
import tempfile
import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torch.quantization import quantize_dynamic

# use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


# **Data transforms & DataLoaders for GTSRB**

In [None]:
# In[2]: Data transforms & DataLoaders for GTSRB
# GTSRB images are RGB; we'll resize to 224×224 and use ImageNet normalization
transform = transforms.Compose([
    # force every image to exactly 224×224
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std =[0.229, 0.224, 0.225]
    )
])


# download/train/test splits
train_dataset = datasets.GTSRB(
    root="./data", split="train", transform=transform, download=True
)
test_dataset  = datasets.GTSRB(
    root="./data", split="test",  transform=transform, download=True
)

train_loader = DataLoader(
    train_dataset, batch_size=32, shuffle=True,  num_workers=2, pin_memory=True
)
test_loader  = DataLoader(
    test_dataset,  batch_size=64, shuffle=False, num_workers=2, pin_memory=True
)

print(f"Train samples: {len(train_dataset)}, Test samples: {len(test_dataset)}")


100%|██████████| 187M/187M [00:13<00:00, 14.3MB/s]
100%|██████████| 89.0M/89.0M [00:06<00:00, 13.2MB/s]
100%|██████████| 99.6k/99.6k [00:00<00:00, 195kB/s]

Train samples: 26640, Test samples: 12630





# ResNet-18

In [None]:
# In[3]: Build & adapt pre-trained ResNet-18
num_classes = 43  # GTSRB has 43 traffic-sign classes
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# replace final layer
model.fc = nn.Linear(model.fc.in_features, num_classes)

model = model.to(device)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 182MB/s]


In [None]:
# In[4]: Accuracy helper
@torch.no_grad()
def evaluate(net, loader, device):
    net.eval()epoc
    correct = total = 0
    for X, y in loader:
        X, y = X.to(device), y.to(device)
        preds = net(X).argmax(dim=1)
        correct += (preds == y).sum().item()
        total   += y.size(0)
    return 100 * correct / total


In [None]:
# In[5]: Baseline accuracy (before any fine-tuning)
baseline_acc = evaluate(model, test_loader, device)
print(f"1) Test accuracy before fine-tuning: {baseline_acc:5.2f}%")


1) Test accuracy before fine-tuning:  3.26%


# Fine tune

In [None]:
# In[6]: Fine-tune for 1 epoch
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

model.train()
running_loss = 0.0
for X, y in train_loader:
    X, y = X.to(device), y.to(device)
    optimizer.zero_grad()
    loss = criterion(model(X), y)
    loss.backward()
    optimizer.step()
    running_loss += loss.item()

print(f"Training loss (1 epoch): {running_loss/len(train_loader):.4f}")

finetuned_acc = evaluate(model, test_loader, device)
print(f"2) Test accuracy after fine-tuning (pre-quant): {finetuned_acc:5.2f}%")


Training loss (1 epoch): 0.2551
2) Test accuracy after fine-tuning (pre-quant): 97.98%


In [None]:
# In[7]: Save fine-tuned model
save_dir = "/kaggle/working"
prequant_path = os.path.join(save_dir, "resnet18_gtsrb_finetuned.pth")
torch.save(model.state_dict(), prequant_path)
print(f"Saved fine-tuned model to {prequant_path}")


Saved fine-tuned model to /kaggle/working/resnet18_gtsrb_finetuned.pth


# Model size before quantization

In [None]:
# In[8]: Model size before quantization
def model_size_mb(path):
    return os.path.getsize(path) / 1e6

size_fp32 = model_size_mb(prequant_path)
print(f"3) Model size before quantization: {size_fp32:.2f} MB")


3) Model size before quantization: 44.88 MB


# Dynamic quantization

In [None]:
# In[9]: Dynamic quantization (INT8)
# we quantize only the Linear layers
model_cpu = model.to("cpu")
quantized_model = quantize_dynamic(
    model_cpu, {nn.Linear}, dtype=torch.qint8
)
quantized_model.eval()


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
# In[10]: Save quantized model
quant_path = os.path.join(save_dir, "resnet18_gtsrb_quantized.pth")
torch.save(quantized_model.state_dict(), quant_path)
print(f"Saved quantized model to {quant_path}")


Saved quantized model to /kaggle/working/resnet18_gtsrb_quantized.pth


# Evaluate quantized accuracy

In [None]:
# In[11]: Evaluate quantized accuracy
int8_acc = evaluate(quantized_model, test_loader, "cpu")
print(f"4) Test accuracy after quantization: {int8_acc:5.2f}%")
print(f"5) Accuracy drop: {finetuned_acc - int8_acc:5.2f} percentage points")


4) Test accuracy after quantization: 97.97%
5) Accuracy drop:  0.01 percentage points


# Model size after quantization

In [None]:
# In[12]: Model size after quantization
size_int8 = model_size_mb(quant_path)
print(f"6) Model size after INT8 quantization: {size_int8:.2f} MB")
print(f"7) Memory saving: {(1 - size_int8/size_fp32)*100:4.1f}%")


6) Model size after INT8 quantization: 44.81 MB
7) Memory saving:  0.1%


In [None]:
# In[13]: Inference latency benchmarking
def measure_latency(net, loader, device, num_batches=10):
    net.eval().to(device)
    total_time = 0.0
    total_images = 0
    with torch.no_grad():
        for i, (X, _) in enumerate(loader):
            if i >= num_batches:
                break
            X = X.to(device)
            start = time.perf_counter()
            _ = net(X)
            end   = time.perf_counter()
            total_time  += (end - start)
            total_images += X.size(0)
    return (total_time / total_images) * 1000  # ms per image

# measure on CPU
fp32_latency = measure_latency(model_cpu, test_loader, "cpu")
int8_latency= measure_latency(quantized_model, test_loader, "cpu")

print(f"8) Avg inference latency (FP32): {fp32_latency:5.2f} ms/image")
print(f"   Avg inference latency (INT8): {int8_latency:5.2f} ms/image")


8) Avg inference latency (FP32): 35.31 ms/image
   Avg inference latency (INT8): 36.12 ms/image


## Observation/Key Take Away


1) Test accuracy before fine-tuning:  3.26%

2) Test accuracy after fine-tuning (pre-quant): 97.98%

3) Model size before quantization: 44.88 MB

4) Test accuracy after quantization: 97.97%


5) Accuracy drop:  0.01 percentage points

6) Model size after INT8 quantization: 44.81 MB

7) Memory saving:  0.1%
