<a href="https://colab.research.google.com/github/wileyw/DeepLearningDemos/blob/master/Quantization/Quantization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import glob
from itertools import chain
import os
import random
import zipfile

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from tqdm.notebook import tqdm


print(f"Torch: {torch.__version__}")

# Training settings
batch_size = 64
epochs = 20
lr = 3e-5
gamma = 0.7
seed = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(seed)

device = 'cuda'

# Necessary to setup quantization
qconfig = torch.quantization.get_default_qat_qconfig('qnnpack')
torch.backends.quantized.engine = 'qnnpack'

Torch: 1.9.0+cu111


In [20]:
!pip install tqdm boto3 requests regex sentencepiece sacremoses

Collecting boto3
  Downloading boto3-1.18.61-py3-none-any.whl (131 kB)
[K     |████████████████████████████████| 131 kB 5.4 MB/s 
Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 55.3 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 53.3 MB/s 
[?25hCollecting botocore<1.22.0,>=1.21.61
  Downloading botocore-1.21.61-py3-none-any.whl (8.0 MB)
[K     |████████████████████████████████| 8.0 MB 47.6 MB/s 
[?25hCollecting s3transfer<0.6.0,>=0.5.0
  Downloading s3transfer-0.5.0-py3-none-any.whl (79 kB)
[K     |████████████████████████████████| 79 kB 6.9 MB/s 
[?25hCollecting jmespath<1.0.0,>=0.7.1
  Downloading jmespath-0.10.0-py2.py3-none-any.whl (24 kB)
Collecting urllib3<1.27,>=1.25.4
  Downloading urllib3-1.26.7-py2.py3-none-any.whl (138 kB)
[K     |████████████████

# Load Data & Augmentations

Download dogs vs cats dataset and input your kaggle key information

In [2]:
!pip install opendatasets
import opendatasets as od
od.download("https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/data")

Collecting opendatasets
  Downloading opendatasets-0.1.20-py3-none-any.whl (14 kB)
Installing collected packages: opendatasets
Successfully installed opendatasets-0.1.20
Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: jinc2020
Your Kaggle Key: ··········
Downloading dogs-vs-cats-redux-kernels-edition.zip to ./dogs-vs-cats-redux-kernels-edition


100%|██████████| 814M/814M [00:04<00:00, 196MB/s]



Extracting archive ./dogs-vs-cats-redux-kernels-edition/dogs-vs-cats-redux-kernels-edition.zip to ./dogs-vs-cats-redux-kernels-edition


In [3]:
!ls dogs-vs-cats-redux-kernels-edition/

sample_submission.csv  test.zip  train.zip


In [4]:
os.makedirs('data', exist_ok=True)
train_dir = 'data/train'
test_dir = 'data/test'

with zipfile.ZipFile('dogs-vs-cats-redux-kernels-edition/train.zip') as train_zip:
    train_zip.extractall('data')
    
with zipfile.ZipFile('dogs-vs-cats-redux-kernels-edition/test.zip') as test_zip:
    test_zip.extractall('data')

train_list = glob.glob(os.path.join(train_dir,'*.jpg'))
test_list = glob.glob(os.path.join(test_dir, '*.jpg'))

print(f"Train Data: {len(train_list)}")
print(f"Test Data: {len(test_list)}")

labels = [path.split('/')[-1].split('.')[0] for path in train_list]

Train Data: 25000
Test Data: 12500


In [5]:
train_list, valid_list = train_test_split(train_list, 
                                          test_size=0.2,
                                          stratify=labels,
                                          random_state=seed)



print(f"Train Data: {len(train_list)}")
print(f"Validation Data: {len(valid_list)}")
print(f"Test Data: {len(test_list)}")

Train Data: 20000
Validation Data: 5000
Test Data: 12500


In [6]:
train_transforms = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]
)

val_transforms = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]
)


test_transforms = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]
)

In [7]:
class CatsDogsDataset(Dataset):
    def __init__(self, file_list, transform=None):
        self.file_list = file_list
        self.transform = transform

    def __len__(self):
        self.filelength = len(self.file_list)
        return self.filelength

    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        img_transformed = self.transform(img)

        label = img_path.split("/")[-1].split(".")[0]
        label = 1 if label == "dog" else 0

        return img_transformed, label

train_data = CatsDogsDataset(train_list, transform=train_transforms)
valid_data = CatsDogsDataset(valid_list, transform=test_transforms)
test_data = CatsDogsDataset(test_list, transform=test_transforms)

train_loader = DataLoader(dataset = train_data, batch_size=batch_size, shuffle=True )
valid_loader = DataLoader(dataset = valid_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset = test_data, batch_size=batch_size, shuffle=True)

print(len(train_data), len(train_loader))
print(len(valid_data), len(valid_loader))

20000 313
5000 79


# Load Model

In [13]:
"""
model = torchvision.models.resnet18(pretrained=True)
# or any of these variants
model = torchvision.models.resnet34(pretrained=True)
model = torchvision.models.resnet50(pretrained=True)
model = torchvision.models.resnet101(pretrained=True)
model = torchvision.models.resnet152(pretrained=True)
"""
model = torchvision.models.resnet101(pretrained=True)

model = model.train()  # Set model to training mode.
# model = model.eval()

Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth


  0%|          | 0.00/171M [00:00<?, ?B/s]

In [14]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

# Setup Quantization

Quantization Implementations:

*   Fine-tune as-is & post-training static quantization
*   Quantization aware training
*   [Dynamic quantization
](https://pytorch.org/tutorials/recipes/recipes/dynamic_quantization.html)


## 1. Dynamic Quantization

In [15]:
float_model = model

dynamic_quantized_model = torch.quantization.quantize_dynamic(float_model, dtype=torch.qint8)

In [16]:
def print_size_of_model(model, label=""):
    torch.save(model.state_dict(), "temp.p")
    size=os.path.getsize("temp.p")
    print("model: ",label,' \t','Size (KB):', size/1e3)
    os.remove('temp.p')
    return size

f=print_size_of_model(float_model,"fp32")
q=print_size_of_model(dynamic_quantized_model,"int8")
print("{0:.2f} times smaller".format(f/q))

model:  fp32  	 Size (KB): 178814.045
model:  int8  	 Size (KB): 172670.809
1.04 times smaller


https://pytorch.org/docs/stable/quantization.html#torch.quantization.quantize_dynamic

"This is the simplest to apply form of quantization where the weights are quantized ahead of time but the activations are dynamically quantized during inference. This is used for situations where the model execution time is dominated by loading weights from memory rather than computing the matrix multiplications. This is true for for LSTM and Transformer type models with small batch size."

In [12]:
print(dynamic_quantized_model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [22]:
torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
float_transformer = torch.hub.load('huggingface/pytorch-transformers', 'model', 'bert-base-uncased', skip_validation=True)

dq_transformer = torch.quantization.quantize_dynamic(float_transformer, dtype=torch.qint8)

f=print_size_of_model(float_transformer,"fp32")
q=print_size_of_model(dq_transformer,"int8")
print("{0:.2f} times smaller".format(f/q))

Using cache found in /root/.cache/torch/hub/huggingface_pytorch-transformers_master


ModuleNotFoundError: ignored

## 2. Post-training Static Quantization

## 3. Quantization Aware Training


In [31]:
float_model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
float_model_fused = torch.quantization.fuse_modules(float_model,
    [['conv1', 'bn1', 'relu']])
float_model_prepared = torch.quantization.prepare_qat(float_model_fused)
model_int8 = torch.quantization.convert(float_model_prepared)

f=print_size_of_model(float_model,"fp32")
q=print_size_of_model(model_int8,"int8")
print("{0:.2f} times smaller".format(f/q))

  reduce_range will be deprecated in a future release of PyTorch."
  Returning default scale and zero point "


model:  fp32  	 Size (KB): 178814.045
model:  int8  	 Size (KB): 46699.104
3.83 times smaller


In [32]:
print(model_int8)

ResNet(
  (conv1): QuantizedConvReLU2d(3, 64, kernel_size=(7, 7), stride=(2, 2), scale=1.0, zero_point=0, padding=(3, 3))
  (bn1): Identity()
  (relu): Identity()
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): QuantizedConv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0)
      (bn1): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=1.0, zero_point=0, padding=(1, 1))
      (bn2): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): QuantizedConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0)
      (bn3): QuantizedBatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Quant