In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
transform = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [3]:
from mobilenetv2 import mobilenet_v2
# from resnet import resnet18
# model = resnet18(pretrained=True)
model = mobilenet_v2(pretrained=True)

In [4]:
import time
import numpy as np

def evaluation(model, dataloader):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    correct = 0
    total = 0
    results = []
    with torch.no_grad():
        for inputs, labels in dataloader:

            inputs, labels = inputs.to(device), labels.to(device)
            
            start = time.time()
            outputs = model(inputs)
            
            torch.cuda.synchronize()
            end = time.time()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            results.append(end-start)

    infer_time = np.mean(results)
    print(infer_time)
    print(f"Accuracy on test set: {100 * correct / total:.2f}%")

In [5]:
evaluation(model, testloader)

0.013822449913507775
Accuracy on test set: 93.91%


## In this step, AIMET inserts fake quantization ops in the model graph and configures them.

Key parameters:

- Setting **default_output_bw** to 8 performs all activation quantizations in the model using integer 8-bit precision
- Setting **default_param_bw** to 8 performs all parameter quantizations in the model using integer 8-bit precision

See [QuantizationSimModel in the AIMET API documentation](https://quic.github.io/aimet-pages/AimetDocs/api_docs/torch_quantsim.html#aimet_torch.quantsim.QuantizationSimModel.compute_encodings) for a full explanation of the parameters.

In [6]:
from aimet_common.defs import QuantScheme
from aimet_torch.v1.quantsim import QuantizationSimModel

dummy_input = torch.rand(1, 3, 32, 32) 

dummy_input = dummy_input.cuda()

sim = QuantizationSimModel(model=model,
                           quant_scheme=QuantScheme.post_training_tf_enhanced,
                           dummy_input=dummy_input,
                           default_output_bw=8,
                           default_param_bw=4)

2024-12-26 04:07:09,724 - root - INFO - AIMET
2024-12-26 04:07:14,332 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.10/dist-packages/aimet_common/quantsim_config/default_config.json
2024-12-26 04:07:14,352 - Quant - INFO - Unsupported op type Squeeze
2024-12-26 04:07:14,353 - Quant - INFO - Unsupported op type Mean
2024-12-26 04:07:14,356 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default


In [7]:
print(sim)

-------------------------
Quantized Model Report
-------------------------
----------------------------------------------------------
Layer: features.0.0
  Input[0]: bw=8, encoding-present=False
  -------
  Param[weight]: bw=4, encoding-present=False
  -------
  Output[0]: Not quantized
  -------
----------------------------------------------------------
Layer: features.0.1
  Input[0]: Not quantized
  -------
  Param[weight]: Not quantized
  -------
  Param[bias]: Not quantized
  -------
  Output[0]: Not quantized
  -------
----------------------------------------------------------
Layer: features.0.2
  Input[0]: Not quantized
  -------
  Output[0]: bw=8, encoding-present=False
  -------
----------------------------------------------------------
Layer: features.1.conv.0.0
  Input[0]: Not quantized
  -------
  Param[weight]: bw=4, encoding-present=False
  -------
  Output[0]: Not quantized
  -------
----------------------------------------------------------
Layer: features.1.conv.0.1
  

In [8]:
def pass_calibration_data(sim_model, data_loader):
    batch_size = data_loader.batch_size

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    sim_model.eval()
    samples = 1000

    batch_cntr = 0
    with torch.no_grad():
        for input_data, target_data in data_loader:

            inputs_batch = input_data.to(device)
            sim_model(inputs_batch)

            batch_cntr += 1
            if (batch_cntr * batch_size) > samples:
                break

AIMET has added quantizer nodes to the model graph, but before the sim model can be used for inference or training, scale and offset quantization parameters must be calculated for each quantizer node by passing unlabeled data samples through the model to collect range statistics. This process is sometimes referred to as calibration. AIMET refers to it as "computing encodings".

In [10]:
sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=trainloader)

In [11]:
evaluation(sim.model, testloader)

0.021739624723603454
Accuracy on test set: 90.00%


In [12]:
from aimet_torch.v1.adaround.adaround_weight import Adaround, AdaroundParameters
import os

data_loader = testloader
params = AdaroundParameters(data_loader=data_loader, num_batches=1, default_num_iterations=64)

dummy_input = torch.rand(1, 3, 32, 32)
dummy_input = dummy_input.cuda()

os.makedirs('./output/', exist_ok=True)
ada_model = Adaround.apply_adaround(model, dummy_input, params,
                                    path="output", 
                                    filename_prefix='adaround', 
                                    default_param_bw=4,
                                    default_quant_scheme=QuantScheme.post_training_tf_enhanced)

2024-12-26 04:07:52,681 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.10/dist-packages/aimet_common/quantsim_config/default_config.json
2024-12-26 04:07:52,700 - Quant - INFO - Unsupported op type Squeeze
2024-12-26 04:07:52,701 - Quant - INFO - Unsupported op type Mean
2024-12-26 04:07:52,704 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default
2024-12-26 04:07:53,347 - Utils - INFO - Caching 1 batches from data loader at path location: /tmp/tmpg5elrk10


                                       

2024-12-26 04:07:53,396 - Quant - INFO - Started Optimizing weight rounding of module: features.0.0


                                               

2024-12-26 04:07:53,941 - Quant - INFO - Started Optimizing weight rounding of module: features.1.conv.0.0


                                               

2024-12-26 04:07:54,121 - Quant - INFO - Started Optimizing weight rounding of module: features.1.conv.1


                                               

2024-12-26 04:07:54,305 - Quant - INFO - Started Optimizing weight rounding of module: features.2.conv.0.0


                                               

2024-12-26 04:07:54,526 - Quant - INFO - Started Optimizing weight rounding of module: features.2.conv.1.0


                                                

2024-12-26 04:07:54,764 - Quant - INFO - Started Optimizing weight rounding of module: features.2.conv.2


                                                

2024-12-26 04:07:54,953 - Quant - INFO - Started Optimizing weight rounding of module: features.3.conv.0.0


                                                

2024-12-26 04:07:55,164 - Quant - INFO - Started Optimizing weight rounding of module: features.3.conv.1.0


                                                

2024-12-26 04:07:55,443 - Quant - INFO - Started Optimizing weight rounding of module: features.3.conv.2


                                                

2024-12-26 04:07:55,673 - Quant - INFO - Started Optimizing weight rounding of module: features.4.conv.0.0


                                                

2024-12-26 04:07:55,894 - Quant - INFO - Started Optimizing weight rounding of module: features.4.conv.1.0


                                                

2024-12-26 04:07:56,123 - Quant - INFO - Started Optimizing weight rounding of module: features.4.conv.2


                                                

2024-12-26 04:07:56,292 - Quant - INFO - Started Optimizing weight rounding of module: features.5.conv.0.0


                                                

2024-12-26 04:07:56,464 - Quant - INFO - Started Optimizing weight rounding of module: features.5.conv.1.0


                                                

2024-12-26 04:07:56,646 - Quant - INFO - Started Optimizing weight rounding of module: features.5.conv.2


                                                

2024-12-26 04:07:56,836 - Quant - INFO - Started Optimizing weight rounding of module: features.6.conv.0.0


                                                

2024-12-26 04:07:57,004 - Quant - INFO - Started Optimizing weight rounding of module: features.6.conv.1.0


                                                

2024-12-26 04:07:57,189 - Quant - INFO - Started Optimizing weight rounding of module: features.6.conv.2


                                                

2024-12-26 04:07:57,379 - Quant - INFO - Started Optimizing weight rounding of module: features.7.conv.0.0


                                                

2024-12-26 04:07:57,556 - Quant - INFO - Started Optimizing weight rounding of module: features.7.conv.1.0


                                                

2024-12-26 04:07:57,732 - Quant - INFO - Started Optimizing weight rounding of module: features.7.conv.2


                                                

2024-12-26 04:07:57,903 - Quant - INFO - Started Optimizing weight rounding of module: features.8.conv.0.0


                                                

2024-12-26 04:07:58,080 - Quant - INFO - Started Optimizing weight rounding of module: features.8.conv.1.0


                                                

2024-12-26 04:07:58,254 - Quant - INFO - Started Optimizing weight rounding of module: features.8.conv.2


                                                

2024-12-26 04:07:58,438 - Quant - INFO - Started Optimizing weight rounding of module: features.9.conv.0.0


                                                

2024-12-26 04:07:58,615 - Quant - INFO - Started Optimizing weight rounding of module: features.9.conv.1.0


                                                

2024-12-26 04:07:58,793 - Quant - INFO - Started Optimizing weight rounding of module: features.9.conv.2


                                                

2024-12-26 04:07:58,975 - Quant - INFO - Started Optimizing weight rounding of module: features.10.conv.0.0


                                                

2024-12-26 04:07:59,157 - Quant - INFO - Started Optimizing weight rounding of module: features.10.conv.1.0


                                                

2024-12-26 04:07:59,341 - Quant - INFO - Started Optimizing weight rounding of module: features.10.conv.2


                                                

2024-12-26 04:07:59,529 - Quant - INFO - Started Optimizing weight rounding of module: features.11.conv.0.0


                                                

2024-12-26 04:07:59,715 - Quant - INFO - Started Optimizing weight rounding of module: features.11.conv.1.0


                                                

2024-12-26 04:07:59,903 - Quant - INFO - Started Optimizing weight rounding of module: features.11.conv.2


                                                

2024-12-26 04:08:00,099 - Quant - INFO - Started Optimizing weight rounding of module: features.12.conv.0.0


                                                

2024-12-26 04:08:00,297 - Quant - INFO - Started Optimizing weight rounding of module: features.12.conv.1.0


                                                

2024-12-26 04:08:00,503 - Quant - INFO - Started Optimizing weight rounding of module: features.12.conv.2


                                                

2024-12-26 04:08:00,721 - Quant - INFO - Started Optimizing weight rounding of module: features.13.conv.0.0


                                                

2024-12-26 04:08:00,924 - Quant - INFO - Started Optimizing weight rounding of module: features.13.conv.1.0


                                                 

2024-12-26 04:08:01,140 - Quant - INFO - Started Optimizing weight rounding of module: features.13.conv.2


                                                 

2024-12-26 04:08:01,359 - Quant - INFO - Started Optimizing weight rounding of module: features.14.conv.0.0


                                                 

2024-12-26 04:08:01,572 - Quant - INFO - Started Optimizing weight rounding of module: features.14.conv.1.0


                                                 

2024-12-26 04:08:01,778 - Quant - INFO - Started Optimizing weight rounding of module: features.14.conv.2


                                                 

2024-12-26 04:08:01,987 - Quant - INFO - Started Optimizing weight rounding of module: features.15.conv.0.0


                                                 

2024-12-26 04:08:02,200 - Quant - INFO - Started Optimizing weight rounding of module: features.15.conv.1.0


                                                 

2024-12-26 04:08:02,404 - Quant - INFO - Started Optimizing weight rounding of module: features.15.conv.2


                                                 

2024-12-26 04:08:02,623 - Quant - INFO - Started Optimizing weight rounding of module: features.16.conv.0.0


                                                 

2024-12-26 04:08:02,842 - Quant - INFO - Started Optimizing weight rounding of module: features.16.conv.1.0


                                                 

2024-12-26 04:08:03,061 - Quant - INFO - Started Optimizing weight rounding of module: features.16.conv.2


                                                 

2024-12-26 04:08:03,285 - Quant - INFO - Started Optimizing weight rounding of module: features.17.conv.0.0


                                                 

2024-12-26 04:08:03,512 - Quant - INFO - Started Optimizing weight rounding of module: features.17.conv.1.0


                                                 

2024-12-26 04:08:03,734 - Quant - INFO - Started Optimizing weight rounding of module: features.17.conv.2


                                                 

2024-12-26 04:08:04,129 - Quant - INFO - Started Optimizing weight rounding of module: features.18.0


                                                 

2024-12-26 04:08:04,367 - Quant - INFO - Started Optimizing weight rounding of module: classifier.1


100%|██████████| 141/141 [00:11<00:00, 12.55it/s]

2024-12-26 04:08:04,597 - Quant - INFO - Completed Adarounding Model





In [13]:
sim = QuantizationSimModel(model=ada_model,
                           dummy_input=dummy_input,
                           quant_scheme=QuantScheme.post_training_tf_enhanced,
                           default_output_bw=8, 
                           default_param_bw=4)

sim.set_and_freeze_param_encodings(encoding_path=os.path.join("output", 'adaround.encodings'))

sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=trainloader)

2024-12-26 04:08:05,926 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.10/dist-packages/aimet_common/quantsim_config/default_config.json
2024-12-26 04:08:05,944 - Quant - INFO - Unsupported op type Squeeze
2024-12-26 04:08:05,945 - Quant - INFO - Unsupported op type Mean
2024-12-26 04:08:05,949 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default


  sim.set_and_freeze_param_encodings(encoding_path=os.path.join("output", 'adaround.encodings'))


In [14]:
evaluation(sim.model, testloader)

0.02189430707617651
Accuracy on test set: 91.85%
