# Quantization Aware Training Sample Code

In [1]:
import os
import random

import torch
import torch.nn as nn
import torchvision

import time
import copy
import numpy as np
from torchvision import transforms
from tqdm import tqdm
from torchsummary import summary
torch.backends.cudnn.benchmark = True

def set_random_seeds(random_seed=0):

    torch.manual_seed(random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)

def memory_check():
    print(f"  Allocated: {round(torch.cuda.memory_allocated()/1024**3,2)} GB")
    print(f"  Cached:    {round(torch.cuda.memory_reserved()/1024**3,2)} GB\n")

print(f"torch = {torch.__version__}")
print(f"torchvision = {torchvision.__version__}")


torch = 1.12.1
torchvision = 0.13.1


## Make ImageNet(validation 6G) Data Loader 

In [2]:
import wget
def ImageNet_DataLoader(split_num = [0.08,0.02,0.9]):
    if not os.path.exists("./data/ImageNet/meta.bin"):
        print("Meta data download")
        wget.download(url="https://image-net.org/data/ILSVRC/2012/ILSVRC2012_devkit_t12.tar.gz", out="./data/ImageNet")
    # if not os.path.exists("./data/ImageNet/ILSVRC2012_devkit_t3.tar.gz"):
    #     print("Toolkit t3 Download")
    #     toolkit_url = "https://image-net.org/data/ILSVRC/2012/ILSVRC2012_devkit_t3.tar.gz"
    #     wget.download(url= toolkit_url,out="./data/ImageNet")
    if not os.path.exists("./data/ImageNet/ILSVRC2012_img_val.tar"):
        print("Download val data")
        val_url  = 'https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar'
        wget.download(url=val_url, out="./data/ImageNet")

    # if not os.path.exists("./data/ImageNet/ILSVRC2012_img_train_t3.tar"):
    #     print("Download train t3 data")
    #     train_url = "https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train_t3.tar"
    #     wget.download(url=train_url,out="./data/ImageNet")
    train_transform = transforms.Compose([
        transforms.Resize((256,256)),
        transforms.RandomCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
    ])

    test_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
    ])
    print(os.getcwd())
    dataset = torchvision.datasets.ImageNet(root="./data/ImageNet",split="val", transform = train_transform)
    Train_dataset, Test_dataset,_ = torch.utils.data.random_split(dataset, split_num)
    print(f"Train data set = {len(Train_dataset)}, Test = {len(Test_dataset)}")
    
    train_sampler = torch.utils.data.RandomSampler(Train_dataset)
    test_sampler = torch.utils.data.SequentialSampler(Test_dataset)

    Train_loader = torch.utils.data.DataLoader(dataset=Train_dataset, batch_size= 32, sampler = train_sampler)
    Test_loader = torch.utils.data.DataLoader(dataset=Test_dataset, batch_size =32, sampler = test_sampler)
    return Train_loader, Test_loader

In [3]:
def Cifar10_Dataloader():
    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding = 4),
        transforms.RandomHorizontalFlip(0.5),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    test_transform = transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])


    train_dataset = torchvision.datasets.CIFAR10(root="data", train=True, download=True, transform=train_transform) 
    # We will use test set for validation and test in this project.
    # Do not use test set for validation in practice!
    test_dataset = torchvision.datasets.CIFAR10(root="data", train=False, download=True, transform=test_transform)
    print(f"Train data set = {len(train_dataset)}, Test = {len(test_dataset)}")

    train_sampler = torch.utils.data.RandomSampler(train_dataset)
    test_sampler = torch.utils.data.SequentialSampler(test_dataset)

    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=128,
        sampler=train_sampler)

    test_loader = torch.utils.data.DataLoader(
        dataset=test_dataset, batch_size=128,
        sampler=test_sampler)
    return train_loader, test_loader


## MobileNetV2

# Train and Evaluate Fuc

In [4]:
def Evaluating(model, test_loader, device, criterion=None):
    model.to(device)
    model.eval()

    running_loss = 0
    running_corrects = 0

    for inputs, labels in tqdm(iter(test_loader)):
        
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)

        _, preds = torch.max(outputs, 1)
        if criterion is not None:
            loss = criterion(outputs, labels).item()
        else:
            loss = 0
        # statistics
        running_loss += loss * labels.size(0)
        running_corrects += (preds == labels).sum().item()

    eval_loss = running_loss / len(test_loader.dataset)
    eval_accuracy = 100 * running_corrects / len(test_loader.dataset)

    return eval_loss, eval_accuracy

In [5]:
def Training(model, train_loader, test_loader, device, optimizer, scheduler, epochs=100,model_name="test"):
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    print("Before Training")
    torch.cuda.memory_reserved()
    memory_check()
    count = 0
    best_loss = np.Inf
    # Training
    model.to(device)
    for epoch in range(epochs):

        running_loss = 0
        running_corrects = 0
        model.train()

        for inputs, labels in tqdm(iter(train_loader)):

            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)

            loss = criterion(outputs, labels)
 
            loss.backward()
            optimizer.step()
            
            _, preds = torch.max(outputs, 1)
            # statistics
            running_loss += loss.item() * labels.size(0)
            running_corrects += (preds == labels).sum().item()

        # Set learning rate scheduler
        if scheduler is not None:
            scheduler.step()
        train_loss = running_loss / len(train_loader.dataset)
        train_accuracy = 100 * running_corrects / len(train_loader.dataset) 

        # Evaluation
        val_loss, val_acc = Evaluating(model,test_loader,device=device,criterion=criterion)
        print(f"--------{epoch+1}----------")
        print(f"Train {train_loss:.4f} Loss, {train_accuracy:.2f} Acc")
        print(f"Validation {val_loss:.4f} Loss, {val_acc:.2f} Acc")

        if best_loss > val_loss:
            best_loss = val_loss
            count = 0
            torch.save(model.state_dict(), f"./models/{model_name}.pt")
        else:
            count +=1
            if count > 10:
                break
    model.load_state_dict(torch.load(f"./models/{model_name}.pt")) 
    return model

## Layer fusion Check
conv, bn, relu를 하나의 layer로 만들어 각각의 layer를 읽어오는 연산을 줄이는 과정   
folding과는 다른 경량화 기법   
Fusion 된 layer는 identity로 바뀜

In [6]:
def model_eq_check(model1, model2, device, rtol=1e-03, atol=1e-06, num_tests=100, input_size=(1,3,32,32)):

    model1.to(device)
    model2.to(device)

    for _ in range(num_tests):
        x = torch.rand(size=input_size).to(device)
        y1 = model1(x).detach().cpu().numpy()
        y2 = model2(x).detach().cpu().numpy()
        # 배열이 허용 오차범위 abs(a - b) <= (atol + rtol * absolute(b)) 이내면 True
        if np.allclose(a=y1, b=y2, rtol=rtol, atol=atol, equal_nan=False) == False:
            print("Model equivalence test fail")
            return False
    print("Two models equal")
    return True

In [7]:
def time_test(model, device, input_size = (1,3,256,256),num_tests=100,):
    model.to(device)
    model.eval()

    x = torch.rand(size=input_size).to(device)

    with torch.no_grad():
        for _ in range(10):
            _ = model(x)
    torch.cuda.synchronize()

    with torch.no_grad():
        start_time = time.time()

        for _ in range(num_tests):
            _ = model(x)
            torch.cuda.synchronize()
        total_time = time.time() - start_time

    aver_time = total_time / num_tests
    return total_time, aver_time

In [8]:
class ConvBnReLUModel(nn.Module):
    def __init__(self):
        super(ConvBnReLUModel,self).__init__()
        self.conv = nn.Conv2d(3,5,3,bias=True).to(dtype=torch.float)
        self.bn = nn.BatchNorm2d(5).to(dtype=torch.float)
        self.relu = torch.nn.ReLU(inplace=True)
        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()
    
    def forward(self,x):
        x = self.quant(x)
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.dequant(x)
        return x
    
model = ConvBnReLUModel().to(device=torch.device("cpu:0"))
model.eval()
print(model)
# for p in model.named_parameters():
#     print(p)
#     print()
# "fbgemm" for server , "qnnpack" for mobile 
# model.qconfig = torch.quantization.get_default_qconfig('fbgemm')

# # torch.quantization.fuse_module or myModel.fuse_model()
fuse_model = torch.ao.quantization.fuse_modules(model,[['conv','bn','relu']], inplace=False)
# fuse_model = model.fuse_model()
print(fuse_model)

print(f"-- Equal Test --")
model_eq_check(model, fuse_model, device=torch.device("cpu:0"))


print(f"-- Infer Time Test --")
ori_cpu_time,_ = time_test(model,torch.device("cpu"))
fus_cpu_time,_ = time_test(fuse_model,torch.device("cpu"))

print(f"origin model infer time {ori_cpu_time:.3f}s")
print(f"fusion model infer time {fus_cpu_time:.3f}s")
del model
del fuse_model



ConvBnReLUModel(
  (conv): Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1))
  (bn): BatchNorm2d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (quant): QuantStub()
  (dequant): DeQuantStub()
)
ConvBnReLUModel(
  (conv): ConvReLU2d(
    (0): Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU(inplace=True)
  )
  (bn): Identity()
  (relu): Identity()
  (quant): QuantStub()
  (dequant): DeQuantStub()
)
-- Equal Test --
Two models equal
-- Infer Time Test --
origin model infer time 0.055s
fusion model infer time 0.041s


# MAIN

In [9]:
# gpu,cpu device 선언
if torch.cuda.is_available():
    gpu_device = torch.device("cuda")
cpu_device = torch.device("cpu")

set_random_seeds(42)

# model 가져오기
from models import mobilenet_v2, MobileNet_V2_Weights,quat_mobilenet_v2
model = quat_mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V1,activation_layer=nn.ReLU)
model.classifier.append(nn.Dropout(0.2))
model.classifier.append(nn.Linear(1000, 10))

from torchsummary import summary
summary(model,(3,32,32), device='cpu') 

# Move the model to CPU since static quantization does not support CUDA currently.
# ImageNet Data 
Train_loader, Test_loader = Cifar10_Dataloader()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         QuantStub-1            [-1, 3, 32, 32]               0
            Conv2d-2           [-1, 32, 16, 16]             864
       BatchNorm2d-3           [-1, 32, 16, 16]              64
              ReLU-4           [-1, 32, 16, 16]               0
            Conv2d-5           [-1, 32, 16, 16]             288
       BatchNorm2d-6           [-1, 32, 16, 16]              64
              ReLU-7           [-1, 32, 16, 16]               0
            Conv2d-8           [-1, 16, 16, 16]             512
       BatchNorm2d-9           [-1, 16, 16, 16]              32
QuantizableInvertedResidual-10           [-1, 16, 16, 16]               0
           Conv2d-11           [-1, 96, 16, 16]           1,536
      BatchNorm2d-12           [-1, 96, 16, 16]             192
             ReLU-13           [-1, 96, 16, 16]               0
           Conv2d-14         

In [10]:
for i,data in enumerate(Train_loader):

    img = data[0].to(gpu_device)
    label = data[1].to(gpu_device)
    print(f"{i} data size = {img.size()}, label size = {label.size()}")
    memory_check()
    if i > 20:
        break

0 data size = torch.Size([128, 3, 32, 32]), label size = torch.Size([128])
  Allocated: 0.0 GB
  Cached:    0.02 GB

1 data size = torch.Size([128, 3, 32, 32]), label size = torch.Size([128])
  Allocated: 0.0 GB
  Cached:    0.02 GB

2 data size = torch.Size([128, 3, 32, 32]), label size = torch.Size([128])
  Allocated: 0.0 GB
  Cached:    0.02 GB

3 data size = torch.Size([128, 3, 32, 32]), label size = torch.Size([128])
  Allocated: 0.0 GB
  Cached:    0.02 GB

4 data size = torch.Size([128, 3, 32, 32]), label size = torch.Size([128])
  Allocated: 0.0 GB
  Cached:    0.02 GB

5 data size = torch.Size([128, 3, 32, 32]), label size = torch.Size([128])
  Allocated: 0.0 GB
  Cached:    0.02 GB

6 data size = torch.Size([128, 3, 32, 32]), label size = torch.Size([128])
  Allocated: 0.0 GB
  Cached:    0.02 GB

7 data size = torch.Size([128, 3, 32, 32]), label size = torch.Size([128])
  Allocated: 0.0 GB
  Cached:    0.02 GB

8 data size = torch.Size([128, 3, 32, 32]), label size = torch.S

In [11]:
need_train=False
if need_train:
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,60,90], gamma=0.5)

    model = Training(model,train_loader=Train_loader,test_loader=Test_loader,device=gpu_device,optimizer=optimizer,scheduler=scheduler,epochs=20,
    model_name = "q_mobilenetv2_cifar10")
else:
    model.load_state_dict(torch.load("./models/q_mobilenetv2_cifar10.pt"))
    _,pre_acc = Evaluating(model,Test_loader,cpu_device)
    print(f"pretrained model acc : {pre_acc:.2f} %")
    # QAT가 적용된 floating point 모델을 quantized int model로 변환
model.to(cpu_device)

100%|██████████| 79/79 [00:05<00:00, 15.53it/s]

pretrained model acc : 90.06 %





QuantizableMobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): QuantizableInvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU()
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (skip_add): FloatFunctional(
        (activation_post_process): Identity()
      )
    )
    (2): QuantizableInvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): C

In [12]:

# 모델을 CPU상태로 두고 eval로 layer fusion
model.eval()
print(model)
# Layer fusion
# fused_model = torch.quantization.fuse_modules(model,[["conv1","bn1","relu"]])

# for module_name, module in fused_model.named_children():
#     if "layer" in module_name:
#         # basic_block 의 conv1, bn1, relu, conv2, bn2 를 fusion
#         for basic_block_name, basic_block in module.named_children():
#             torch.ao.quantization.fuse_modules(basic_block,[["conv1","bn1","relu"],["conv2","bn2"]],inplace=True)
#             # basic_block안의 downsampling block의 Conv2d Batchnorm2D fusion
#             for sub_block_name, sub_block in basic_block.named_children():
#                 if sub_block_name == "downsample":
#                     torch.ao.quantization.fuse_modules(sub_block,[["0","1"]], inplace=True)
# print(fused_model)
fused_model = copy.deepcopy(model)
fused_model.fuse_model()
# Equal Test
print(f"Equal Test between origin and fused")
print(model_eq_check(model,fused_model,device=cpu_device))

QuantizableMobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): QuantizableInvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU()
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (skip_add): FloatFunctional(
        (activation_post_process): Identity()
      )
    )
    (2): QuantizableInvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): C

In [7]:
def calibrate_model(model, loader, device=torch.device("cpu")):
    print("calibrating ...")
    model.to(device)
    model.eval()

    for inputs, labels in tqdm(loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        _ = model(inputs)

In [26]:
from models.mobilenetv2 import quat_mobilenet_v2
new = False
if new:
    quat_model = quat_mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V1,activation_layer=nn.ReLU)
    print(quat_model)
    quat_model.classifier.append(nn.Dropout(0.2))
    quat_model.classifier.append(nn.Linear(1000, 10))
    quat_model.fuse_model()
else:
    pre_model = copy.deepcopy(fused_model)
    _,acc = Evaluating(pre_model,Test_loader,cpu_device)
    print(f"Before quantization acc : {acc:.2f} %")
    pre_model.eval()
    pre_model.qconfig = torch.quantization.get_default_qconfig("fbgemm")
    torch.quantization.prepare(pre_model,inplace=True)
    calibrate_model(pre_model, Test_loader)
    pre_model = torch.quantization.convert(pre_model,inplace=True)
    torch.jit.save(torch.jit.script(pre_model),"./models/Q_mobilenetv2_cifar10_jit.pt")
    pre_model = torch.jit.load("./models/Q_mobilenetv2_cifar10_jit.pt")
    _,int8_acc = Evaluating(pre_model,Test_loader,cpu_device)
    print(f"post int8_model acc :{int8_acc:.2f} %")
    
    quat_model = fused_model
# qconfig("fbgemm") 은 server 용 "qnnpack"은 mobile용 ["fbgemm", "x86", "qnnpack", "onednn"]

# QAT를 하기위해 quantization 모델 준비
quat_model.train()
quat_model.qconfig = torch.quantization.get_default_qconfig("fbgemm")
print(quat_model.qconfig)
quat_model = torch.quantization.prepare_qat(quat_model)
print(type(quat_model))

# print('Inverted Residual Block: After preparation for QAT, note fake-quantization modules \n',quat_model.features[1].conv)

100%|██████████| 79/79 [00:04<00:00, 18.92it/s]


Before quantization acc : 90.06 %
calibrating ...


100%|██████████| 79/79 [00:10<00:00,  7.83it/s]
100%|██████████| 79/79 [00:04<00:00, 16.17it/s]


post int8_model acc :85.65 %
<class 'models.mobilenetv2.QuantizableMobileNetV2'>


In [14]:
qat_need = True
if qat_need:
    optimizer = torch.optim.SGD(quat_model.parameters(), lr=1e-5, momentum=0.9, weight_decay=5e-4)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,60,90], gamma=0.5)

    first_loss, first_acc = Evaluating(model=quat_model,test_loader=Test_loader,device=cpu_device,criterion=nn.CrossEntropyLoss())
    print(f"Before Acc : {first_acc:.2f} acc")
    quat_model = Training(quat_model,train_loader=Train_loader,test_loader=Test_loader,
    device=gpu_device,optimizer=optimizer,scheduler=scheduler,epochs=30,model_name="QAT_mobilenetv2_cifar10")

100%|██████████| 79/79 [00:08<00:00,  9.06it/s]


Before Acc : 90.06 acc
Before Training
  Allocated: 0.0 GB
  Cached:    0.03 GB



100%|██████████| 391/391 [00:50<00:00,  7.70it/s]
100%|██████████| 79/79 [00:06<00:00, 11.54it/s]


--------0----------
Train 0.6539 Loss, 94.07 Acc
Validation 0.7404 Loss, 89.93 Acc


100%|██████████| 391/391 [00:49<00:00,  7.95it/s]
100%|██████████| 79/79 [00:06<00:00, 11.61it/s]


--------1----------
Train 0.6514 Loss, 94.11 Acc
Validation 0.7387 Loss, 90.03 Acc


100%|██████████| 391/391 [00:49<00:00,  7.94it/s]
100%|██████████| 79/79 [00:06<00:00, 11.58it/s]


--------2----------
Train 0.6492 Loss, 94.16 Acc
Validation 0.7379 Loss, 90.11 Acc


100%|██████████| 391/391 [00:49<00:00,  7.93it/s]
100%|██████████| 79/79 [00:06<00:00, 11.54it/s]


--------3----------
Train 0.6496 Loss, 94.15 Acc
Validation 0.7385 Loss, 90.11 Acc


100%|██████████| 391/391 [00:49<00:00,  7.90it/s]
100%|██████████| 79/79 [00:06<00:00, 11.72it/s]


--------4----------
Train 0.6481 Loss, 94.33 Acc
Validation 0.7382 Loss, 90.16 Acc


100%|██████████| 391/391 [00:49<00:00,  7.97it/s]
100%|██████████| 79/79 [00:06<00:00, 11.74it/s]


--------5----------
Train 0.6473 Loss, 94.32 Acc
Validation 0.7375 Loss, 90.19 Acc


100%|██████████| 391/391 [00:48<00:00,  8.02it/s]
100%|██████████| 79/79 [00:06<00:00, 11.69it/s]


--------6----------
Train 0.6467 Loss, 94.40 Acc
Validation 0.7382 Loss, 90.13 Acc


100%|██████████| 391/391 [00:48<00:00,  8.01it/s]
100%|██████████| 79/79 [00:06<00:00, 11.65it/s]


--------7----------
Train 0.6456 Loss, 94.42 Acc
Validation 0.7370 Loss, 90.19 Acc


100%|██████████| 391/391 [00:49<00:00,  7.97it/s]
100%|██████████| 79/79 [00:06<00:00, 11.59it/s]


--------8----------
Train 0.6463 Loss, 94.31 Acc
Validation 0.7370 Loss, 90.14 Acc


100%|██████████| 391/391 [00:48<00:00,  8.01it/s]
100%|██████████| 79/79 [00:06<00:00, 11.75it/s]


--------9----------
Train 0.6459 Loss, 94.42 Acc
Validation 0.7378 Loss, 90.08 Acc


100%|██████████| 391/391 [00:48<00:00,  7.99it/s]
100%|██████████| 79/79 [00:06<00:00, 11.70it/s]


--------10----------
Train 0.6445 Loss, 94.48 Acc
Validation 0.7379 Loss, 90.25 Acc


100%|██████████| 391/391 [00:49<00:00,  7.95it/s]
100%|██████████| 79/79 [00:06<00:00, 11.62it/s]


--------11----------
Train 0.6450 Loss, 94.32 Acc
Validation 0.7373 Loss, 90.16 Acc


100%|██████████| 391/391 [00:49<00:00,  7.92it/s]
100%|██████████| 79/79 [00:06<00:00, 11.50it/s]


--------12----------
Train 0.6437 Loss, 94.40 Acc
Validation 0.7357 Loss, 90.30 Acc


100%|██████████| 391/391 [00:49<00:00,  7.97it/s]
100%|██████████| 79/79 [00:06<00:00, 11.54it/s]


--------13----------
Train 0.6420 Loss, 94.55 Acc
Validation 0.7363 Loss, 90.28 Acc


100%|██████████| 391/391 [00:49<00:00,  7.97it/s]
100%|██████████| 79/79 [00:06<00:00, 11.56it/s]


--------14----------
Train 0.6418 Loss, 94.53 Acc
Validation 0.7369 Loss, 90.30 Acc


100%|██████████| 391/391 [00:49<00:00,  7.85it/s]
100%|██████████| 79/79 [00:06<00:00, 11.58it/s]


--------15----------
Train 0.6427 Loss, 94.39 Acc
Validation 0.7365 Loss, 90.33 Acc


100%|██████████| 391/391 [00:49<00:00,  7.98it/s]
100%|██████████| 79/79 [00:06<00:00, 11.52it/s]


--------16----------
Train 0.6423 Loss, 94.50 Acc
Validation 0.7359 Loss, 90.25 Acc


100%|██████████| 391/391 [00:49<00:00,  7.89it/s]
100%|██████████| 79/79 [00:06<00:00, 11.69it/s]


--------17----------
Train 0.6424 Loss, 94.56 Acc
Validation 0.7355 Loss, 90.34 Acc


100%|██████████| 391/391 [00:48<00:00,  8.04it/s]
100%|██████████| 79/79 [00:06<00:00, 11.85it/s]


--------18----------
Train 0.6417 Loss, 94.48 Acc
Validation 0.7352 Loss, 90.25 Acc


100%|██████████| 391/391 [00:48<00:00,  8.00it/s]
100%|██████████| 79/79 [00:06<00:00, 11.80it/s]


--------19----------
Train 0.6415 Loss, 94.55 Acc
Validation 0.7367 Loss, 90.32 Acc


100%|██████████| 391/391 [00:48<00:00,  8.01it/s]
100%|██████████| 79/79 [00:06<00:00, 11.62it/s]


--------20----------
Train 0.6410 Loss, 94.57 Acc
Validation 0.7367 Loss, 90.27 Acc


100%|██████████| 391/391 [00:48<00:00,  7.99it/s]
100%|██████████| 79/79 [00:06<00:00, 11.46it/s]


--------21----------
Train 0.6407 Loss, 94.46 Acc
Validation 0.7366 Loss, 90.16 Acc


100%|██████████| 391/391 [00:49<00:00,  7.93it/s]
100%|██████████| 79/79 [00:06<00:00, 11.48it/s]


--------22----------
Train 0.6386 Loss, 94.70 Acc
Validation 0.7354 Loss, 90.23 Acc


100%|██████████| 391/391 [00:48<00:00,  8.04it/s]
100%|██████████| 79/79 [00:06<00:00, 11.60it/s]


--------23----------
Train 0.6408 Loss, 94.48 Acc
Validation 0.7362 Loss, 90.27 Acc


100%|██████████| 391/391 [00:49<00:00,  7.95it/s]
100%|██████████| 79/79 [00:06<00:00, 11.88it/s]


--------24----------
Train 0.6400 Loss, 94.66 Acc
Validation 0.7362 Loss, 90.22 Acc


100%|██████████| 391/391 [00:48<00:00,  8.03it/s]
100%|██████████| 79/79 [00:06<00:00, 11.42it/s]


--------25----------
Train 0.6406 Loss, 94.58 Acc
Validation 0.7354 Loss, 90.26 Acc


100%|██████████| 391/391 [00:48<00:00,  8.00it/s]
100%|██████████| 79/79 [00:06<00:00, 11.48it/s]


--------26----------
Train 0.6407 Loss, 94.47 Acc
Validation 0.7352 Loss, 90.30 Acc


100%|██████████| 391/391 [00:49<00:00,  7.98it/s]
100%|██████████| 79/79 [00:06<00:00, 11.71it/s]


--------27----------
Train 0.6408 Loss, 94.65 Acc
Validation 0.7350 Loss, 90.31 Acc


100%|██████████| 391/391 [00:49<00:00,  7.97it/s]
100%|██████████| 79/79 [00:06<00:00, 11.60it/s]


--------28----------
Train 0.6400 Loss, 94.66 Acc
Validation 0.7348 Loss, 90.28 Acc


100%|██████████| 391/391 [00:49<00:00,  7.91it/s]
100%|██████████| 79/79 [00:06<00:00, 11.76it/s]

--------29----------
Train 0.6367 Loss, 94.84 Acc
Validation 0.7349 Loss, 90.33 Acc





In [15]:
# QAT가 적용된 floating point 모델을 quantized int model로 변환
# quat_model.load_state_dict(torch.load("./models/QAT_mobilenetv2_cifar10.pt"))
quat_model.to('cpu')
int8_model = torch.ao.quantization.convert(quat_model)
int8_model.eval()
_,int8_acc = Evaluating(int8_model,Test_loader,cpu_device)
print(f"int8_model acc : {int8_acc:.2f} %")
torch.jit.save(torch.jit.script(int8_model),"./models/QAT_mobilenetv2_cifar10_jit.pt")
int8_model = torch.jit.load("./models/QAT_mobilenetv2_cifar10_jit.pt",map_location=cpu_device)

100%|██████████| 79/79 [00:03<00:00, 24.29it/s]


int8_model acc : 88.47 %


In [16]:
_,int8_acc = Evaluating(int8_model,Test_loader,cpu_device)
print(f"jit int8_model acc : {int8_acc:.2f} %")

100%|██████████| 79/79 [00:04<00:00, 15.96it/s]

jit int8_model acc : 88.47 %





In [27]:
pre_model = torch.jit.load("./models/Q_mobilenetv2_cifar10_jit.pt")
_,int8_acc = Evaluating(pre_model,Test_loader,cpu_device)
print(f"post int8_model acc :{int8_acc:.2f} %")

100%|██████████| 79/79 [00:04<00:00, 16.03it/s]

post int8_model acc :85.65 %





In [5]:
from models import quat_mobilenet_v2
from torchsummary import summary
inverted_residual_setting = [
                # t, c, n, s
                [1, 16, 1, 1],
                # [6, 24, 2, 1],  # NOTE: change stride 2 -> 1 for CIFAR10
                [4, 32, 3, 2],
                [4, 64, 4, 2],
                # [6, 96, 3, 1],
                [4, 128, 3, 2],
                # [6, 320, 1, 1],
            ]
tiny_mobilenet = quat_mobilenet_v2(cifar10=True)
summary(tiny_mobilenet,(3,32,32),device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         QuantStub-1            [-1, 3, 32, 32]               0
            Conv2d-2           [-1, 32, 16, 16]             864
       BatchNorm2d-3           [-1, 32, 16, 16]              64
              ReLU-4           [-1, 32, 16, 16]               0
            Conv2d-5           [-1, 32, 16, 16]             288
       BatchNorm2d-6           [-1, 32, 16, 16]              64
              ReLU-7           [-1, 32, 16, 16]               0
            Conv2d-8           [-1, 16, 16, 16]             512
       BatchNorm2d-9           [-1, 16, 16, 16]              32
QuantizableInvertedResidual-10           [-1, 16, 16, 16]               0
           Conv2d-11           [-1, 64, 16, 16]           1,024
      BatchNorm2d-12           [-1, 64, 16, 16]             128
             ReLU-13           [-1, 64, 16, 16]               0
           Conv2d-14         

In [9]:
optimizer = torch.optim.SGD(tiny_mobilenet.parameters(), lr=1e-2, momentum=0.9, weight_decay=5e-4)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,70], gamma=0.5)

tiny_mobilenet = Training(tiny_mobilenet,Train_loader,Test_loader,gpu_device,optimizer,scheduler,model_name="tiny")

Before Training
  Allocated: 0.0 GB
  Cached:    0.0 GB



100%|██████████| 391/391 [00:39<00:00,  9.83it/s]
100%|██████████| 79/79 [00:04<00:00, 18.31it/s]


--------1----------
Train 1.9100 Loss, 32.71 Acc
Validation 1.7708 Loss, 40.71 Acc


100%|██████████| 391/391 [00:36<00:00, 10.78it/s]
100%|██████████| 79/79 [00:04<00:00, 18.30it/s]


--------2----------
Train 1.6648 Loss, 45.58 Acc
Validation 1.5759 Loss, 50.83 Acc


100%|██████████| 391/391 [00:36<00:00, 10.79it/s]
100%|██████████| 79/79 [00:04<00:00, 18.31it/s]


--------3----------
Train 1.5527 Loss, 51.65 Acc
Validation 1.4685 Loss, 55.61 Acc


100%|██████████| 391/391 [00:36<00:00, 10.83it/s]
100%|██████████| 79/79 [00:04<00:00, 18.30it/s]


--------4----------
Train 1.4661 Loss, 55.93 Acc
Validation 1.4088 Loss, 58.74 Acc


100%|██████████| 391/391 [00:36<00:00, 10.84it/s]
100%|██████████| 79/79 [00:04<00:00, 18.41it/s]


--------5----------
Train 1.3946 Loss, 59.35 Acc
Validation 1.3212 Loss, 62.56 Acc


100%|██████████| 391/391 [00:36<00:00, 10.82it/s]
100%|██████████| 79/79 [00:04<00:00, 18.64it/s]


--------6----------
Train 1.3385 Loss, 62.42 Acc
Validation 1.2739 Loss, 64.98 Acc


100%|██████████| 391/391 [00:36<00:00, 10.81it/s]
100%|██████████| 79/79 [00:04<00:00, 18.29it/s]


--------7----------
Train 1.2960 Loss, 64.35 Acc
Validation 1.2375 Loss, 66.87 Acc


100%|██████████| 391/391 [00:35<00:00, 10.91it/s]
100%|██████████| 79/79 [00:04<00:00, 18.63it/s]


--------8----------
Train 1.2587 Loss, 65.83 Acc
Validation 1.2230 Loss, 68.05 Acc


100%|██████████| 391/391 [00:36<00:00, 10.79it/s]
100%|██████████| 79/79 [00:04<00:00, 18.08it/s]


--------9----------
Train 1.2233 Loss, 67.60 Acc
Validation 1.1985 Loss, 68.92 Acc


100%|██████████| 391/391 [00:36<00:00, 10.79it/s]
100%|██████████| 79/79 [00:04<00:00, 18.30it/s]


--------10----------
Train 1.1964 Loss, 68.64 Acc
Validation 1.1538 Loss, 71.13 Acc


100%|██████████| 391/391 [00:36<00:00, 10.77it/s]
100%|██████████| 79/79 [00:04<00:00, 18.32it/s]


--------11----------
Train 1.1320 Loss, 71.90 Acc
Validation 1.0916 Loss, 73.40 Acc


100%|██████████| 391/391 [00:32<00:00, 11.95it/s]
100%|██████████| 79/79 [00:04<00:00, 18.37it/s]


--------12----------
Train 1.1131 Loss, 72.54 Acc
Validation 1.0825 Loss, 73.94 Acc


100%|██████████| 391/391 [00:35<00:00, 10.94it/s]
100%|██████████| 79/79 [00:04<00:00, 18.74it/s]


--------13----------
Train 1.0969 Loss, 73.38 Acc
Validation 1.0678 Loss, 74.48 Acc


100%|██████████| 391/391 [00:35<00:00, 10.88it/s]
100%|██████████| 79/79 [00:04<00:00, 18.62it/s]


--------14----------
Train 1.0834 Loss, 74.13 Acc
Validation 1.0669 Loss, 75.01 Acc


100%|██████████| 391/391 [00:35<00:00, 10.94it/s]
100%|██████████| 79/79 [00:04<00:00, 18.64it/s]


--------15----------
Train 1.0716 Loss, 74.58 Acc
Validation 1.0620 Loss, 74.85 Acc


100%|██████████| 391/391 [00:36<00:00, 10.84it/s]
100%|██████████| 79/79 [00:04<00:00, 18.59it/s]


--------16----------
Train 1.0623 Loss, 74.87 Acc
Validation 1.0511 Loss, 75.26 Acc


100%|██████████| 391/391 [00:36<00:00, 10.75it/s]
100%|██████████| 79/79 [00:04<00:00, 18.74it/s]


--------17----------
Train 1.0474 Loss, 75.70 Acc
Validation 1.0421 Loss, 75.89 Acc


100%|██████████| 391/391 [00:36<00:00, 10.82it/s]
100%|██████████| 79/79 [00:04<00:00, 18.54it/s]


--------18----------
Train 1.0407 Loss, 76.17 Acc
Validation 1.0388 Loss, 76.11 Acc


100%|██████████| 391/391 [00:36<00:00, 10.83it/s]
100%|██████████| 79/79 [00:04<00:00, 18.27it/s]


--------19----------
Train 1.0307 Loss, 76.49 Acc
Validation 1.0274 Loss, 76.48 Acc


100%|██████████| 391/391 [00:36<00:00, 10.76it/s]
100%|██████████| 79/79 [00:04<00:00, 18.53it/s]


--------20----------
Train 1.0187 Loss, 76.98 Acc
Validation 1.0115 Loss, 77.58 Acc


100%|██████████| 391/391 [00:37<00:00, 10.54it/s]
100%|██████████| 79/79 [00:04<00:00, 18.17it/s]


--------21----------
Train 1.0130 Loss, 77.27 Acc
Validation 1.0090 Loss, 77.57 Acc


100%|██████████| 391/391 [00:36<00:00, 10.83it/s]
100%|██████████| 79/79 [00:04<00:00, 18.38it/s]


--------22----------
Train 1.0026 Loss, 77.80 Acc
Validation 1.0092 Loss, 77.56 Acc


100%|██████████| 391/391 [00:37<00:00, 10.57it/s]
100%|██████████| 79/79 [00:04<00:00, 18.11it/s]


--------23----------
Train 0.9945 Loss, 78.22 Acc
Validation 0.9930 Loss, 78.23 Acc


100%|██████████| 391/391 [00:36<00:00, 10.77it/s]
100%|██████████| 79/79 [00:04<00:00, 18.29it/s]


--------24----------
Train 0.9853 Loss, 78.58 Acc
Validation 1.0002 Loss, 77.97 Acc


100%|██████████| 391/391 [00:36<00:00, 10.85it/s]
100%|██████████| 79/79 [00:04<00:00, 18.43it/s]


--------25----------
Train 0.9791 Loss, 78.91 Acc
Validation 0.9812 Loss, 78.68 Acc


100%|██████████| 391/391 [00:37<00:00, 10.45it/s]
100%|██████████| 79/79 [00:04<00:00, 18.44it/s]


--------26----------
Train 0.9742 Loss, 79.15 Acc
Validation 0.9862 Loss, 78.55 Acc


100%|██████████| 391/391 [00:36<00:00, 10.81it/s]
100%|██████████| 79/79 [00:04<00:00, 18.39it/s]


--------27----------
Train 0.9624 Loss, 79.70 Acc
Validation 0.9706 Loss, 78.91 Acc


100%|██████████| 391/391 [00:35<00:00, 10.94it/s]
100%|██████████| 79/79 [00:04<00:00, 18.71it/s]


--------28----------
Train 0.9600 Loss, 79.85 Acc
Validation 0.9865 Loss, 78.21 Acc


100%|██████████| 391/391 [00:35<00:00, 10.95it/s]
100%|██████████| 79/79 [00:04<00:00, 18.70it/s]


--------29----------
Train 0.9512 Loss, 80.26 Acc
Validation 0.9725 Loss, 79.30 Acc


100%|██████████| 391/391 [00:35<00:00, 10.90it/s]
100%|██████████| 79/79 [00:04<00:00, 18.64it/s]


--------30----------
Train 0.9451 Loss, 80.40 Acc
Validation 0.9684 Loss, 79.34 Acc


100%|██████████| 391/391 [00:36<00:00, 10.79it/s]
100%|██████████| 79/79 [00:04<00:00, 18.74it/s]


--------31----------
Train 0.9203 Loss, 81.63 Acc
Validation 0.9420 Loss, 80.88 Acc


100%|██████████| 391/391 [00:35<00:00, 10.95it/s]
100%|██████████| 79/79 [00:04<00:00, 18.49it/s]


--------32----------
Train 0.9110 Loss, 81.85 Acc
Validation 0.9369 Loss, 81.04 Acc


100%|██████████| 391/391 [00:35<00:00, 10.96it/s]
100%|██████████| 79/79 [00:04<00:00, 18.81it/s]


--------33----------
Train 0.9058 Loss, 82.13 Acc
Validation 0.9488 Loss, 80.31 Acc


100%|██████████| 391/391 [00:35<00:00, 10.99it/s]
100%|██████████| 79/79 [00:04<00:00, 18.79it/s]


--------34----------
Train 0.8996 Loss, 82.72 Acc
Validation 0.9462 Loss, 80.93 Acc


100%|██████████| 391/391 [00:36<00:00, 10.84it/s]
100%|██████████| 79/79 [00:04<00:00, 18.26it/s]


--------35----------
Train 0.8938 Loss, 82.77 Acc
Validation 0.9369 Loss, 81.12 Acc


100%|██████████| 391/391 [00:36<00:00, 10.77it/s]
100%|██████████| 79/79 [00:04<00:00, 18.29it/s]


--------36----------
Train 0.8989 Loss, 82.67 Acc
Validation 0.9305 Loss, 81.13 Acc


100%|██████████| 391/391 [00:36<00:00, 10.77it/s]
100%|██████████| 79/79 [00:04<00:00, 18.30it/s]


--------37----------
Train 0.8906 Loss, 82.84 Acc
Validation 0.9267 Loss, 81.46 Acc


100%|██████████| 391/391 [00:36<00:00, 10.69it/s]
100%|██████████| 79/79 [00:04<00:00, 18.30it/s]


--------38----------
Train 0.8850 Loss, 83.00 Acc
Validation 0.9364 Loss, 80.79 Acc


100%|██████████| 391/391 [00:36<00:00, 10.78it/s]
100%|██████████| 79/79 [00:04<00:00, 18.83it/s]


--------39----------
Train 0.8843 Loss, 83.13 Acc
Validation 0.9355 Loss, 80.94 Acc


100%|██████████| 391/391 [00:36<00:00, 10.73it/s]
100%|██████████| 79/79 [00:04<00:00, 18.11it/s]


--------40----------
Train 0.8821 Loss, 83.22 Acc
Validation 0.9298 Loss, 81.45 Acc


100%|██████████| 391/391 [00:37<00:00, 10.51it/s]
100%|██████████| 79/79 [00:04<00:00, 17.86it/s]


--------41----------
Train 0.8759 Loss, 83.56 Acc
Validation 0.9276 Loss, 81.22 Acc


100%|██████████| 391/391 [00:36<00:00, 10.83it/s]
100%|██████████| 79/79 [00:04<00:00, 18.75it/s]


--------42----------
Train 0.8736 Loss, 83.80 Acc
Validation 0.9255 Loss, 81.40 Acc


100%|██████████| 391/391 [00:35<00:00, 10.97it/s]
100%|██████████| 79/79 [00:04<00:00, 18.73it/s]


--------43----------
Train 0.8714 Loss, 83.91 Acc
Validation 0.9321 Loss, 81.52 Acc


100%|██████████| 391/391 [00:35<00:00, 10.87it/s]
100%|██████████| 79/79 [00:04<00:00, 18.70it/s]


--------44----------
Train 0.8654 Loss, 84.09 Acc
Validation 0.9287 Loss, 81.26 Acc


100%|██████████| 391/391 [00:36<00:00, 10.84it/s]
100%|██████████| 79/79 [00:04<00:00, 18.38it/s]


--------45----------
Train 0.8658 Loss, 84.08 Acc
Validation 0.9334 Loss, 81.37 Acc


100%|██████████| 391/391 [00:35<00:00, 10.91it/s]
100%|██████████| 79/79 [00:04<00:00, 18.56it/s]


--------46----------
Train 0.8634 Loss, 84.16 Acc
Validation 0.9299 Loss, 81.06 Acc


100%|██████████| 391/391 [00:35<00:00, 10.88it/s]
100%|██████████| 79/79 [00:04<00:00, 18.63it/s]


--------47----------
Train 0.8592 Loss, 84.42 Acc
Validation 0.9262 Loss, 81.57 Acc


100%|██████████| 391/391 [00:36<00:00, 10.85it/s]
100%|██████████| 79/79 [00:04<00:00, 18.30it/s]


--------48----------
Train 0.8582 Loss, 84.47 Acc
Validation 0.9212 Loss, 81.47 Acc


100%|██████████| 391/391 [00:36<00:00, 10.78it/s]
100%|██████████| 79/79 [00:04<00:00, 18.30it/s]


--------49----------
Train 0.8588 Loss, 84.38 Acc
Validation 0.9221 Loss, 81.53 Acc


100%|██████████| 391/391 [00:36<00:00, 10.84it/s]
100%|██████████| 79/79 [00:04<00:00, 18.55it/s]


--------50----------
Train 0.8514 Loss, 84.75 Acc
Validation 0.9298 Loss, 81.07 Acc


100%|██████████| 391/391 [00:36<00:00, 10.82it/s]
100%|██████████| 79/79 [00:04<00:00, 18.60it/s]


--------51----------
Train 0.8492 Loss, 84.68 Acc
Validation 0.9186 Loss, 81.71 Acc


100%|██████████| 391/391 [00:36<00:00, 10.82it/s]
100%|██████████| 79/79 [00:04<00:00, 18.90it/s]


--------52----------
Train 0.8455 Loss, 84.83 Acc
Validation 0.9167 Loss, 81.53 Acc


100%|██████████| 391/391 [00:35<00:00, 10.89it/s]
100%|██████████| 79/79 [00:04<00:00, 18.44it/s]


--------53----------
Train 0.8447 Loss, 84.78 Acc
Validation 0.9129 Loss, 81.97 Acc


100%|██████████| 391/391 [00:35<00:00, 10.89it/s]
100%|██████████| 79/79 [00:04<00:00, 18.62it/s]


--------54----------
Train 0.8382 Loss, 85.21 Acc
Validation 0.9150 Loss, 82.17 Acc


100%|██████████| 391/391 [00:36<00:00, 10.86it/s]
100%|██████████| 79/79 [00:04<00:00, 18.63it/s]


--------55----------
Train 0.8399 Loss, 85.10 Acc
Validation 0.9189 Loss, 82.02 Acc


100%|██████████| 391/391 [00:32<00:00, 11.96it/s]
100%|██████████| 79/79 [00:04<00:00, 18.80it/s]


--------56----------
Train 0.8355 Loss, 85.34 Acc
Validation 0.9135 Loss, 82.05 Acc


100%|██████████| 391/391 [00:35<00:00, 10.89it/s]
100%|██████████| 79/79 [00:04<00:00, 18.45it/s]


--------57----------
Train 0.8322 Loss, 85.79 Acc
Validation 0.9047 Loss, 82.31 Acc


100%|██████████| 391/391 [00:36<00:00, 10.86it/s]
100%|██████████| 79/79 [00:04<00:00, 18.70it/s]


--------58----------
Train 0.8298 Loss, 85.65 Acc
Validation 0.9076 Loss, 82.37 Acc


100%|██████████| 391/391 [00:35<00:00, 10.97it/s]
100%|██████████| 79/79 [00:04<00:00, 18.94it/s]


--------59----------
Train 0.8296 Loss, 85.61 Acc
Validation 0.9137 Loss, 81.90 Acc


100%|██████████| 391/391 [00:36<00:00, 10.82it/s]
100%|██████████| 79/79 [00:04<00:00, 18.31it/s]


--------60----------
Train 0.8249 Loss, 85.90 Acc
Validation 0.9115 Loss, 81.91 Acc


100%|██████████| 391/391 [00:36<00:00, 10.77it/s]
100%|██████████| 79/79 [00:04<00:00, 18.31it/s]


--------61----------
Train 0.8273 Loss, 85.78 Acc
Validation 0.9074 Loss, 82.44 Acc


100%|██████████| 391/391 [00:36<00:00, 10.77it/s]
100%|██████████| 79/79 [00:04<00:00, 18.42it/s]


--------62----------
Train 0.8212 Loss, 86.00 Acc
Validation 0.9069 Loss, 82.48 Acc


100%|██████████| 391/391 [00:35<00:00, 10.98it/s]
100%|██████████| 79/79 [00:04<00:00, 17.98it/s]


--------63----------
Train 0.8216 Loss, 86.20 Acc
Validation 0.9068 Loss, 82.35 Acc


100%|██████████| 391/391 [00:36<00:00, 10.75it/s]
100%|██████████| 79/79 [00:04<00:00, 18.26it/s]


--------64----------
Train 0.8186 Loss, 86.11 Acc
Validation 0.9021 Loss, 82.64 Acc


100%|██████████| 391/391 [00:36<00:00, 10.70it/s]
100%|██████████| 79/79 [00:04<00:00, 18.27it/s]


--------65----------
Train 0.8149 Loss, 86.31 Acc
Validation 0.9134 Loss, 82.04 Acc


100%|██████████| 391/391 [00:36<00:00, 10.77it/s]
100%|██████████| 79/79 [00:04<00:00, 18.17it/s]


--------66----------
Train 0.8131 Loss, 86.50 Acc
Validation 0.8978 Loss, 82.95 Acc


100%|██████████| 391/391 [00:36<00:00, 10.71it/s]
100%|██████████| 79/79 [00:04<00:00, 18.27it/s]


--------67----------
Train 0.8127 Loss, 86.38 Acc
Validation 0.9064 Loss, 82.53 Acc


100%|██████████| 391/391 [00:36<00:00, 10.76it/s]
100%|██████████| 79/79 [00:04<00:00, 18.14it/s]


--------68----------
Train 0.8110 Loss, 86.46 Acc
Validation 0.8988 Loss, 82.99 Acc


100%|██████████| 391/391 [00:36<00:00, 10.76it/s]
100%|██████████| 79/79 [00:04<00:00, 18.28it/s]


--------69----------
Train 0.8080 Loss, 86.57 Acc
Validation 0.9209 Loss, 81.76 Acc


100%|██████████| 391/391 [00:36<00:00, 10.82it/s]
100%|██████████| 79/79 [00:04<00:00, 18.20it/s]


--------70----------
Train 0.8072 Loss, 86.68 Acc
Validation 0.9056 Loss, 82.78 Acc


100%|██████████| 391/391 [00:35<00:00, 10.90it/s]
100%|██████████| 79/79 [00:04<00:00, 18.18it/s]


--------71----------
Train 0.7860 Loss, 87.66 Acc
Validation 0.8894 Loss, 83.36 Acc


100%|██████████| 391/391 [00:36<00:00, 10.74it/s]
100%|██████████| 79/79 [00:04<00:00, 18.41it/s]


--------72----------
Train 0.7791 Loss, 87.84 Acc
Validation 0.8869 Loss, 83.51 Acc


100%|██████████| 391/391 [00:36<00:00, 10.71it/s]
100%|██████████| 79/79 [00:04<00:00, 18.34it/s]


--------73----------
Train 0.7766 Loss, 88.02 Acc
Validation 0.8831 Loss, 83.84 Acc


100%|██████████| 391/391 [00:36<00:00, 10.79it/s]
100%|██████████| 79/79 [00:04<00:00, 18.30it/s]


--------74----------
Train 0.7721 Loss, 88.33 Acc
Validation 0.8827 Loss, 83.85 Acc


100%|██████████| 391/391 [00:36<00:00, 10.58it/s]
100%|██████████| 79/79 [00:04<00:00, 18.27it/s]


--------75----------
Train 0.7707 Loss, 88.36 Acc
Validation 0.8879 Loss, 83.67 Acc


100%|██████████| 391/391 [00:37<00:00, 10.54it/s]
100%|██████████| 79/79 [00:04<00:00, 18.07it/s]


--------76----------
Train 0.7697 Loss, 88.41 Acc
Validation 0.8829 Loss, 83.88 Acc


100%|██████████| 391/391 [00:37<00:00, 10.46it/s]
100%|██████████| 79/79 [00:04<00:00, 17.87it/s]


--------77----------
Train 0.7720 Loss, 88.28 Acc
Validation 0.8894 Loss, 83.33 Acc


100%|██████████| 391/391 [00:37<00:00, 10.51it/s]
100%|██████████| 79/79 [00:04<00:00, 18.02it/s]


--------78----------
Train 0.7701 Loss, 88.52 Acc
Validation 0.8903 Loss, 83.31 Acc


100%|██████████| 391/391 [00:37<00:00, 10.51it/s]
100%|██████████| 79/79 [00:04<00:00, 17.79it/s]


--------79----------
Train 0.7675 Loss, 88.60 Acc
Validation 0.8875 Loss, 83.66 Acc


100%|██████████| 391/391 [00:36<00:00, 10.71it/s]
100%|██████████| 79/79 [00:04<00:00, 18.24it/s]


--------80----------
Train 0.7644 Loss, 88.65 Acc
Validation 0.8864 Loss, 83.48 Acc


100%|██████████| 391/391 [00:36<00:00, 10.84it/s]
100%|██████████| 79/79 [00:04<00:00, 18.59it/s]


--------81----------
Train 0.7640 Loss, 88.74 Acc
Validation 0.8901 Loss, 83.52 Acc


100%|██████████| 391/391 [00:35<00:00, 10.87it/s]
100%|██████████| 79/79 [00:04<00:00, 18.41it/s]


--------82----------
Train 0.7590 Loss, 88.86 Acc
Validation 0.8891 Loss, 83.70 Acc


100%|██████████| 391/391 [00:35<00:00, 10.95it/s]
100%|██████████| 79/79 [00:04<00:00, 18.77it/s]


--------83----------
Train 0.7601 Loss, 88.81 Acc
Validation 0.8865 Loss, 83.68 Acc


100%|██████████| 391/391 [00:35<00:00, 10.94it/s]
100%|██████████| 79/79 [00:04<00:00, 18.22it/s]


--------84----------
Train 0.7586 Loss, 89.02 Acc
Validation 0.8820 Loss, 84.03 Acc


100%|██████████| 391/391 [00:36<00:00, 10.64it/s]
100%|██████████| 79/79 [00:04<00:00, 18.52it/s]


--------85----------
Train 0.7593 Loss, 88.98 Acc
Validation 0.8865 Loss, 83.55 Acc


100%|██████████| 391/391 [00:36<00:00, 10.81it/s]
100%|██████████| 79/79 [00:04<00:00, 18.54it/s]


--------86----------
Train 0.7545 Loss, 89.24 Acc
Validation 0.8870 Loss, 83.75 Acc


100%|██████████| 391/391 [00:36<00:00, 10.71it/s]
100%|██████████| 79/79 [00:04<00:00, 18.20it/s]


--------87----------
Train 0.7573 Loss, 88.95 Acc
Validation 0.8939 Loss, 83.35 Acc


100%|██████████| 391/391 [00:36<00:00, 10.84it/s]
100%|██████████| 79/79 [00:04<00:00, 18.64it/s]


--------88----------
Train 0.7566 Loss, 88.98 Acc
Validation 0.8855 Loss, 83.70 Acc


100%|██████████| 391/391 [00:35<00:00, 10.91it/s]
100%|██████████| 79/79 [00:04<00:00, 18.72it/s]


--------89----------
Train 0.7535 Loss, 89.22 Acc
Validation 0.8913 Loss, 83.41 Acc


100%|██████████| 391/391 [00:35<00:00, 10.90it/s]
100%|██████████| 79/79 [00:04<00:00, 18.71it/s]


--------90----------
Train 0.7568 Loss, 88.90 Acc
Validation 0.8858 Loss, 83.31 Acc


100%|██████████| 391/391 [00:35<00:00, 10.86it/s]
100%|██████████| 79/79 [00:04<00:00, 18.58it/s]


--------91----------
Train 0.7545 Loss, 89.11 Acc
Validation 0.8904 Loss, 83.52 Acc


100%|██████████| 391/391 [00:36<00:00, 10.78it/s]
100%|██████████| 79/79 [00:04<00:00, 18.66it/s]


--------92----------
Train 0.7517 Loss, 89.20 Acc
Validation 0.8868 Loss, 83.95 Acc


100%|██████████| 391/391 [00:37<00:00, 10.56it/s]
100%|██████████| 79/79 [00:04<00:00, 17.93it/s]


--------93----------
Train 0.7490 Loss, 89.41 Acc
Validation 0.8878 Loss, 83.74 Acc


100%|██████████| 391/391 [00:37<00:00, 10.53it/s]
100%|██████████| 79/79 [00:04<00:00, 18.07it/s]


--------94----------
Train 0.7495 Loss, 89.36 Acc
Validation 0.8862 Loss, 83.99 Acc


100%|██████████| 391/391 [00:37<00:00, 10.47it/s]
100%|██████████| 79/79 [00:04<00:00, 18.02it/s]

--------95----------
Train 0.7486 Loss, 89.37 Acc
Validation 0.8894 Loss, 83.75 Acc





In [10]:
def _replace_relu(module: nn.Module) -> None:
    from models.layers import Quant_ReLU
    reassign = {}
    for name, mod in module.named_children():
        _replace_relu(mod)
        # Checking for explicit type instead of instance
        # as we only want to replace modules of the exact type
        # not inherited classes
        if type(mod) is nn.ReLU or type(mod) is nn.ReLU6 or type(mod) is Quant_ReLU:
            reassign[name] = nn.ReLU(inplace=False)

    for key, value in reassign.items():
        module._modules[key] = value
def fused_model(model,is_qat = None) -> None:
    from models import Conv2dNormActivation, QuantizableInvertedResidual
    for m in model.modules():
        if type(m) is Conv2dNormActivation:
            torch.ao.quantization.fuse_modules(m, ["0", "1", "2"],inplace=True)
        if type(m) is QuantizableInvertedResidual:
            m.fuse_model(is_qat)
            
tiny_mobilenet = quat_mobilenet_v2(cifar10=True)
tiny_mobilenet = tiny_mobilenet.to("cpu")
tiny_mobilenet.load_state_dict(torch.load("./models/tiny_mobilenetv2_cifar.pt"))
# from utils import custom_quant_weights
# tiny_mobilenet, _ = custom_quant_weights(tiny_mobilenet)

_,acc = Evaluating(tiny_mobilenet,Test_loader,"cpu")
print(f"Before quantization acc : {acc:.2f} %")

tiny_quant = copy.deepcopy(tiny_mobilenet)

from models import quantize_model
qconfig = torch.quantization.get_default_qconfig("fbgemm")
quantize_model(tiny_quant, data= Train_loader,qconfig=qconfig)

# torch.jit.save(torch.jit.script(tiny_quant),"./models/Q_tiny.pt")
# tiny_quant = torch.jit.load("./models/Q_tiny.pt")
_,int8_acc = Evaluating(tiny_quant,Test_loader,"cpu")
print(f"post int8_model acc :{int8_acc:.2f} %")

100%|██████████| 79/79 [00:03<00:00, 24.98it/s]


Before quantization acc : 84.03 %
Q config = QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.HistogramObserver'>, reduce_range=True){}, weight=functools.partial(<class 'torch.ao.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric){})
calibrating...


100%|██████████| 79/79 [00:02<00:00, 28.78it/s]

post int8_model acc :83.79 %



