In [1]:
import os
import random
import sys
if os.path.dirname(os.getcwd()) not in sys.path:
    sys.path.append(os.path.dirname(os.getcwd()))
import torch
import torch.nn as nn
import torchvision

import time
import copy
import numpy as np
from torchvision import transforms
from tqdm import tqdm
from torchsummary import summary

def set_random_seeds(random_seed=0):

    torch.manual_seed(random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)

def memory_check():
    print(f"  Allocated: {round(torch.cuda.memory_allocated()/1024**3,2)} GB")
    print(f"  Cached:    {round(torch.cuda.memory_reserved()/1024**3,2)} GB\n")

print(f"torch = {torch.__version__}")
print(f"torchvision = {torchvision.__version__}")

torch = 1.12.1
torchvision = 0.13.1


In [2]:
from models import quat_mobilenet_v2
tiny_model = quat_mobilenet_v2(cifar10=True)
tiny_model.load_state_dict(torch.load("./models/tiny_mobilenetv2_cifar.pt"))
summary(tiny_model,(3,32,32),device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         QuantStub-1            [-1, 3, 32, 32]               0
            Conv2d-2           [-1, 32, 16, 16]             864
       BatchNorm2d-3           [-1, 32, 16, 16]              64
              ReLU-4           [-1, 32, 16, 16]               0
            Conv2d-5           [-1, 32, 16, 16]             288
       BatchNorm2d-6           [-1, 32, 16, 16]              64
              ReLU-7           [-1, 32, 16, 16]               0
            Conv2d-8           [-1, 16, 16, 16]             512
       BatchNorm2d-9           [-1, 16, 16, 16]              32
QuantizableInvertedResidual-10           [-1, 16, 16, 16]               0
           Conv2d-11           [-1, 64, 16, 16]           1,024
      BatchNorm2d-12           [-1, 64, 16, 16]             128
             ReLU-13           [-1, 64, 16, 16]               0
           Conv2d-14         

In [3]:
tiny_model.to("cpu")
with torch.no_grad():
    total_tensor = torch.tensor([])
    state = tiny_model.state_dict()
    for i in state.keys():
        new_param = state[i].view(-1)
        total_tensor = torch.cat((total_tensor,new_param),0)
        
    print(total_tensor.shape)
    total_tensor,_ = total_tensor.sort()
    print(f"Max {torch.max(total_tensor)}")
    print(f"Min {torch.min(total_tensor)}")
    number = int(len(total_tensor)*0.01)
    print(number)
    
    M = total_tensor[-number]
    m = total_tensor[number]
    print(M,m)
    print(total_tensor[:20])

    

torch.Size([670508])
Max 32846.0
Min -0.9091643691062927
6705
tensor(0.5107) tensor(-0.1579)
tensor([-0.9092, -0.8394, -0.8371, -0.7841, -0.7512, -0.7318, -0.7111, -0.7075,
        -0.7015, -0.6907, -0.6876, -0.6641, -0.6532, -0.6381, -0.6341, -0.6314,
        -0.6302, -0.6222, -0.6174, -0.6107])


In [4]:
with torch.no_grad():
    new_M = 0
    new_m = 0 
    for name, param in tiny_model.named_parameters():
        new_param = param.clamp(m,M)
        new_param = torch.round(254*(new_param-m)/(M-m)-127)
        new_param = new_param/1000
        
        new_M = max(new_M,torch.max(new_param))
        new_m = max(new_m,torch.max(new_param))
    print(new_M,new_m)


tensor(0.1270) tensor(0.1270)


In [8]:
from utils import Train
train_dataset, test_dataset = Train.Cifar10_Dataloader(quantize=True,only_dataset=True)


Files already downloaded and verified
Files already downloaded and verified
Train data set = 50000, Test = 10000


In [9]:
fuse_model = copy.deepcopy(tiny_model)
fuse_model.fuse_model()
state = fuse_model.state_dict()
total_tensor = torch.tensor([])
for i in state.keys():
    new_param = state[i].view(-1)
    total_tensor = torch.cat((total_tensor,new_param),0)
length = len(total_tensor)
total_tensor,_ = total_tensor.sort()
print(length)
print(torch.max(total_tensor), torch.min(total_tensor))
print(total_tensor[:20])
print(total_tensor[-20:])

670508
tensor(32846.) tensor(-0.9092)
tensor([-0.9092, -0.8394, -0.8371, -0.7841, -0.7512, -0.7318, -0.7111, -0.7075,
        -0.7015, -0.6907, -0.6876, -0.6641, -0.6532, -0.6381, -0.6341, -0.6314,
        -0.6302, -0.6222, -0.6174, -0.6107])
tensor([32846., 32846., 32846., 32846., 32846., 32846., 32846., 32846., 32846.,
        32846., 32846., 32846., 32846., 32846., 32846., 32846., 32846., 32846.,
        32846., 32846.])


In [4]:
from models import Quant_ReLU
from models import quat_mobilenet_v2
import torch
test_model = quat_mobilenet_v2(cifar10=True, activation_layer = Quant_ReLU)
x = torch.rand(1,3,32,32)
print(x.shape)
print(x.dim)
test_model.eval()
test_model.to('cpu')
y = test_model(x,check=True)

torch.Size([1, 3, 32, 32])
<built-in method dim of Tensor object at 0x7f734a4deb30>
Before quant torch.Size([1, 3, 32, 32])
tensor([[[[0.7015, 0.0094, 0.6844,  ..., 0.9708, 0.9056, 0.4092],
          [0.4952, 0.8349, 0.2106,  ..., 0.1093, 0.3152, 0.3218],
          [0.9619, 0.2924, 0.7515,  ..., 0.0749, 0.6031, 0.9446],
          ...,
          [0.6778, 0.7219, 0.6975,  ..., 0.5404, 0.7204, 0.2027],
          [0.4839, 0.5764, 0.8945,  ..., 0.7231, 0.9630, 0.8795],
          [0.2107, 0.4932, 0.2533,  ..., 0.0445, 0.0238, 0.9571]],

         [[0.3503, 0.7456, 0.3864,  ..., 0.5104, 0.3343, 0.2511],
          [0.6302, 0.4099, 0.0470,  ..., 0.6817, 0.0322, 0.0287],
          [0.2448, 0.3077, 0.5786,  ..., 0.6606, 0.3641, 0.2507],
          ...,
          [0.2450, 0.5552, 0.5145,  ..., 0.0156, 0.4594, 0.5732],
          [0.6668, 0.1426, 0.4182,  ..., 0.2259, 0.1909, 0.5003],
          [0.1236, 0.4521, 0.3786,  ..., 0.4592, 0.4758, 0.9741]],

         [[0.6547, 0.3612, 0.9795,  ..., 0.2076, 0

In [6]:
from models import quat_mobilenet_v2,mobilenet_v2
from models import Quant_ReLU
from torchsummary import summary
from utils import Data
from utils import Train
import torch
import numpy as np
from tqdm import tqdm
from utils.Train import Evaluating
from utils.Train import custom_quant_weights,custom_dequant_weights
from models.mobilenetv2 import replace_Qrelu, replace_relu, MobileNet_V2_Weights
from utils import set_random_seeds
# device 
if torch.cuda.is_available():
    gpu_device = torch.device("cuda")
cpu_device = torch.device("cpu")

# set random 
set_random_seeds(42)

# model load
model = quat_mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V1,activation_layer=torch.nn.ReLU)
model.classifier.append(torch.nn.Dropout(0.2))
model.classifier.append(torch.nn.Linear(1000, 10))
model.load_state_dict(torch.load("./models/q_mobilenetv2_cifar10.pt"))
# model = mobilenet_v2(cifar10=True)
# summary(model,(3,32,32),device="cpu")
# data load
train_loader, test_loader = Data.Cifar10_Dataloader()


Files already downloaded and verified
Files already downloaded and verified
Train data set = 50000, Test = 10000


In [7]:

# optimizer 
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6, momentum=0.9, weight_decay=5e-4)

# scheduler 
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20,60,90], gamma=0.5)

# train model
criterion = torch.nn.CrossEntropyLoss(label_smoothing=0.0)
count = 0
best_loss = np.Inf
# Training
model.to(gpu_device)
val_loss, val_acc = Evaluating(model,test_loader,device=gpu_device,criterion=criterion)
print("Before Training")
print(f"Validation {val_loss:.4f} Loss, {val_acc:.2f} Acc")

# with torch.autograd.set_detect_anomaly(True):
for epoch in range(100):

    running_loss = 0
    running_corrects = 0
    model.train()
    for inputs, labels in tqdm(iter(train_loader),leave=False):
        inputs = inputs.to(gpu_device)
        labels = labels.to(gpu_device)
        # input 
        check = []
        for i in range(inputs.size(0)):
            temp = inputs.clone()
            M = torch.max(temp[i])
            m = torch.min(temp[i])
            check.append([m,M])
            inputs[i] = torch.round(254*(inputs[i]-m)/(M-m)-127)/1000
        
        for i in range(inputs.size(0)):
            m = check[i][0]
            M = check[i][1]
            inputs[i] = (1000*inputs[i]+127)*(M-m)/254+m

        optimizer.zero_grad()
        # forward + backward + optimize
        # model,backup = custom_quant_weights(model)
        # model = custom_dequant_weights(model,backup)
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        _, preds = torch.max(outputs, 1)
        # statistics
        running_loss += loss.item() * labels.size(0)
        running_corrects += (preds == labels).sum().item()
        
    # Set learning rate scheduler
    if scheduler is not None:
        scheduler.step()
    train_loss = running_loss / len(train_loader.dataset)
    train_accuracy = 100 * running_corrects / len(train_loader.dataset) 

    # Evaluation
    val_loss, val_acc = Evaluating(model,test_loader,device=gpu_device,criterion=criterion)
    print(f"--------{epoch+1}----------")
    print(f"Train {train_loss:.4f} Loss, {train_accuracy:.2f} Acc")
    print(f"Validation {val_loss:.4f} Loss, {val_acc:.2f} Acc")
    if best_loss > val_loss:
        best_loss = val_loss
        count = 0
        torch.save(model.state_dict(), f"./models/test.pt")
    else:
        count +=1
        if count > 10:
            break

                                               

Before Training
Validation 0.3677 Loss, 90.04 Acc


                                                 

--------1----------
Train 0.2917 Loss, 92.90 Acc
Validation 0.3707 Loss, 89.69 Acc


                                                 

--------2----------
Train 0.2849 Loss, 92.93 Acc
Validation 0.3590 Loss, 90.00 Acc


                                                 

--------3----------
Train 0.2716 Loss, 93.20 Acc
Validation 0.3545 Loss, 89.84 Acc


                                                 

--------4----------
Train 0.2685 Loss, 93.07 Acc
Validation 0.3501 Loss, 89.84 Acc


                                                 

--------5----------
Train 0.2646 Loss, 92.97 Acc
Validation 0.3442 Loss, 89.85 Acc


                                                 

--------6----------
Train 0.2529 Loss, 93.22 Acc
Validation 0.3408 Loss, 89.85 Acc


                                                 

--------7----------
Train 0.2500 Loss, 93.03 Acc
Validation 0.3382 Loss, 89.98 Acc


                                                 

--------8----------
Train 0.2469 Loss, 92.92 Acc
Validation 0.3360 Loss, 89.96 Acc


                                                 

--------9----------
Train 0.2410 Loss, 93.08 Acc
Validation 0.3369 Loss, 89.80 Acc


                                                 

--------10----------
Train 0.2419 Loss, 92.94 Acc
Validation 0.3293 Loss, 89.95 Acc


                                                 

--------11----------
Train 0.2371 Loss, 93.05 Acc
Validation 0.3296 Loss, 89.89 Acc


                                                 

--------12----------
Train 0.2356 Loss, 92.96 Acc
Validation 0.3303 Loss, 89.81 Acc


                                                 

--------13----------
Train 0.2319 Loss, 93.14 Acc
Validation 0.3300 Loss, 89.81 Acc


                                                 

--------14----------
Train 0.2276 Loss, 93.17 Acc
Validation 0.3258 Loss, 89.94 Acc


                                                 

--------15----------
Train 0.2248 Loss, 93.20 Acc
Validation 0.3269 Loss, 89.81 Acc


                                                 

--------16----------
Train 0.2273 Loss, 92.99 Acc
Validation 0.3233 Loss, 89.96 Acc


                                                 

--------17----------
Train 0.2248 Loss, 93.09 Acc
Validation 0.3248 Loss, 90.00 Acc


                                                 

--------18----------
Train 0.2234 Loss, 93.03 Acc
Validation 0.3238 Loss, 89.98 Acc


                                                 

--------19----------
Train 0.2222 Loss, 93.02 Acc
Validation 0.3251 Loss, 90.01 Acc


                                                 

--------20----------
Train 0.2177 Loss, 93.22 Acc
Validation 0.3226 Loss, 90.07 Acc


                                                 

--------21----------
Train 0.2189 Loss, 93.06 Acc
Validation 0.3270 Loss, 89.99 Acc


                                                 

--------22----------
Train 0.2181 Loss, 93.09 Acc
Validation 0.3251 Loss, 89.87 Acc


                                                 

--------23----------
Train 0.2203 Loss, 92.99 Acc
Validation 0.3225 Loss, 89.87 Acc


                                                 

--------24----------
Train 0.2173 Loss, 93.23 Acc
Validation 0.3240 Loss, 89.92 Acc


                                                 

--------25----------
Train 0.2173 Loss, 93.07 Acc
Validation 0.3212 Loss, 90.02 Acc


                                                 

--------26----------
Train 0.2146 Loss, 93.21 Acc
Validation 0.3274 Loss, 89.99 Acc


                                                 

--------27----------
Train 0.2171 Loss, 93.09 Acc
Validation 0.3203 Loss, 90.12 Acc


                                                 

--------28----------
Train 0.2161 Loss, 93.19 Acc
Validation 0.3275 Loss, 89.80 Acc


                                                 

--------29----------
Train 0.2128 Loss, 93.13 Acc
Validation 0.3228 Loss, 90.04 Acc


                                                 

--------30----------
Train 0.2141 Loss, 93.16 Acc
Validation 0.3216 Loss, 90.04 Acc


                                                 

--------31----------
Train 0.2141 Loss, 93.14 Acc
Validation 0.3231 Loss, 89.87 Acc


                                                 

--------32----------
Train 0.2153 Loss, 93.14 Acc
Validation 0.3241 Loss, 89.87 Acc


                                                 

--------33----------
Train 0.2125 Loss, 93.17 Acc
Validation 0.3239 Loss, 89.96 Acc


                                                 

--------34----------
Train 0.2131 Loss, 93.20 Acc
Validation 0.3302 Loss, 89.89 Acc


                                                 

--------35----------
Train 0.2113 Loss, 93.17 Acc
Validation 0.3230 Loss, 90.01 Acc


                                                 

--------36----------
Train 0.2129 Loss, 93.14 Acc
Validation 0.3228 Loss, 90.08 Acc


                                                 

--------37----------
Train 0.2141 Loss, 93.05 Acc
Validation 0.3229 Loss, 90.03 Acc


                                                 

--------38----------
Train 0.2095 Loss, 93.28 Acc
Validation 0.3279 Loss, 89.87 Acc




In [8]:
from models import quantize_model,quat_mobilenet_v2,MobileNet_V2_Weights
import copy
import torch
train_loader, test_loader = Data.Cifar10_Dataloader()
model = quat_mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V1,activation_layer=torch.nn.ReLU)
model.classifier.append(torch.nn.Dropout(0.2))
model.classifier.append(torch.nn.Linear(1000, 10))
model.to('cpu')
# model.load_state_dict(torch.load("./models/input_q_mobilenetv2_cifar10.pt"))
model.load_state_dict(torch.load("./models/test.pt"))
quat_model = copy.deepcopy(model)
qconfig = torch.quantization.get_default_qconfig("fbgemm")
quantize_model(quat_model, data= train_loader,qconfig=qconfig)
_,int8_acc = Evaluating(quat_model,test_loader,"cpu")
print(f"post int8_model acc :{int8_acc:.2f} %")


Files already downloaded and verified
Files already downloaded and verified
Train data set = 50000, Test = 10000
Q config = QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.HistogramObserver'>, reduce_range=True){}, weight=functools.partial(<class 'torch.ao.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric){})
calibrating...


                                                 

post int8_model acc :88.03 %




In [5]:

model.load_state_dict(torch.load("./models/q_mobilenetv2_cifar10.pt"))
quat_model = copy.deepcopy(model)
quantize_model(quat_model, data= train_loader)
_,int8_acc = Evaluating(quat_model,test_loader,"cpu")
print(f"post int8_model acc :{int8_acc:.2f} %")

Q config = QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, quant_min=0, quant_max=127){}, weight=functools.partial(<class 'torch.ao.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric){})
calibrating...


                                               

post int8_model acc :61.87 %




In [9]:
from models import quat_mobilenet_v2,mobilenet_v2
from models import Quant_ReLU
from torchsummary import summary
from utils import Data
from utils import Train
import torch
import numpy as np
from tqdm import tqdm
from utils.Train import Evaluating
from utils.Train import custom_quant_weights,custom_dequant_weights
from models.mobilenetv2 import replace_Qrelu, replace_relu, MobileNet_V2_Weights
from utils import set_random_seeds
# device 
if torch.cuda.is_available():
    gpu_device = torch.device("cuda")
cpu_device = torch.device("cpu")

# set random 
set_random_seeds(42)

# model load
model = quat_mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V1,activation_layer=torch.nn.ReLU)
model.classifier.append(torch.nn.Dropout(0.2))
model.classifier.append(torch.nn.Linear(1000, 10))
model.load_state_dict(torch.load("./models/q_mobilenetv2_cifar10.pt"))
# model = mobilenet_v2(cifar10=True)
# summary(model,(3,32,32),device="cpu")
# data load
train_loader, test_loader = Data.Cifar10_Dataloader()


Files already downloaded and verified
Files already downloaded and verified
Train data set = 50000, Test = 10000


In [10]:

# optimizer 
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6, momentum=0.9, weight_decay=5e-4)

# scheduler 
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20,60,90], gamma=0.5)

# train model
criterion = torch.nn.CrossEntropyLoss(label_smoothing=0.0)
count = 0
best_loss = np.Inf
# Training
model.to(gpu_device)
val_loss, val_acc = Evaluating(model,test_loader,device=gpu_device,criterion=criterion)
print("Before Training")
print(f"Validation {val_loss:.4f} Loss, {val_acc:.2f} Acc")

# with torch.autograd.set_detect_anomaly(True):
for epoch in range(100):

    running_loss = 0
    running_corrects = 0
    model.train()
    for inputs, labels in tqdm(iter(train_loader),leave=False):
        inputs = inputs.to(gpu_device)
        labels = labels.to(gpu_device)
        # input 
        # with torch.no_grad():
        #     check = []
        #     for i in range(inputs.size(0)):
        #         temp = inputs.clone()
        #         M = torch.max(temp[i])
        #         m = torch.min(temp[i])
        #         check.append([m,M])
        #         inputs[i] = torch.round(254*(inputs[i]-m)/(M-m)-127)/1000
            
        #     for i in range(inputs.size(0)):
        #         m = check[i][0]
        #         M = check[i][1]
        #         inputs[i] = (1000*inputs[i]+127)*(M-m)/254+m

        optimizer.zero_grad()
        # forward + backward + optimize
        model,backup = custom_quant_weights(model)
        model = custom_dequant_weights(model,backup)
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        _, preds = torch.max(outputs, 1)
        # statistics
        running_loss += loss.item() * labels.size(0)
        running_corrects += (preds == labels).sum().item()
        
    # Set learning rate scheduler
    if scheduler is not None:
        scheduler.step()
    train_loss = running_loss / len(train_loader.dataset)
    train_accuracy = 100 * running_corrects / len(train_loader.dataset) 

    # Evaluation
    val_loss, val_acc = Evaluating(model,test_loader,device=gpu_device,criterion=criterion)
    print(f"--------{epoch+1}----------")
    print(f"Train {train_loss:.4f} Loss, {train_accuracy:.2f} Acc")
    print(f"Validation {val_loss:.4f} Loss, {val_acc:.2f} Acc")
    if best_loss > val_loss:
        best_loss = val_loss
        count = 0
        torch.save(model.state_dict(), f"./models/test.pt")
    else:
        count +=1
        if count > 10:
            break

                                               

Before Training
Validation 0.3677 Loss, 90.04 Acc


                                                 

--------1----------
Train 0.2984 Loss, 92.91 Acc
Validation 0.3794 Loss, 89.71 Acc


                                                 

--------2----------
Train 0.3005 Loss, 92.85 Acc
Validation 0.3739 Loss, 89.89 Acc


                                                 

--------3----------
Train 0.2955 Loss, 93.04 Acc
Validation 0.3719 Loss, 89.77 Acc


                                                 

--------4----------
Train 0.2990 Loss, 92.94 Acc
Validation 0.3768 Loss, 89.69 Acc


                                                 

--------5----------
Train 0.3024 Loss, 92.81 Acc
Validation 0.3735 Loss, 89.72 Acc


                                                 

--------6----------
Train 0.2965 Loss, 93.12 Acc
Validation 0.3759 Loss, 89.77 Acc


                                                 

--------7----------
Train 0.2979 Loss, 92.91 Acc
Validation 0.3750 Loss, 89.76 Acc


                                                 

--------8----------
Train 0.2989 Loss, 92.85 Acc
Validation 0.3744 Loss, 89.79 Acc


                                                 

--------9----------
Train 0.2977 Loss, 92.93 Acc
Validation 0.3754 Loss, 89.71 Acc


                                                 

--------10----------
Train 0.3016 Loss, 92.80 Acc
Validation 0.3727 Loss, 89.82 Acc


                                                 

--------11----------
Train 0.3008 Loss, 92.91 Acc
Validation 0.3745 Loss, 89.82 Acc


                                                 

--------12----------
Train 0.3024 Loss, 92.77 Acc
Validation 0.3762 Loss, 89.67 Acc


                                                 

--------13----------
Train 0.3009 Loss, 92.92 Acc
Validation 0.3746 Loss, 89.73 Acc


                                                 

--------14----------
Train 0.2997 Loss, 93.00 Acc
Validation 0.3731 Loss, 89.77 Acc




In [13]:
from models import quantize_model,quat_mobilenet_v2,MobileNet_V2_Weights
import copy
import torch
from utils import Data,Evaluating
train_loader, test_loader = Data.Cifar10_Dataloader()
model = quat_mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V1,activation_layer=torch.nn.ReLU)
model.classifier.append(torch.nn.Dropout(0.2))
model.classifier.append(torch.nn.Linear(1000, 10))
model.to('cpu')
model.load_state_dict(torch.load("./models/weight_q_mobilenetv2_cifar10.pt"))
quat_model = copy.deepcopy(model)
qconfig = torch.ao.quantization.QConfig(  # type: ignore[assignment]
                activation=torch.ao.quantization.default_histogram_observer, weight=torch.ao.quantization.weight_observer_range_neg_127_to_127
            )
quantize_model(quat_model, data= train_loader,qconfig=qconfig)
_,int8_acc = Evaluating(quat_model,test_loader,"cpu")
print(f"post int8_model acc :{int8_acc:.2f} %")


Files already downloaded and verified
Files already downloaded and verified
Train data set = 50000, Test = 10000
Q config = QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.HistogramObserver'>, quant_min=0, quant_max=127){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric, quant_min=-127, quant_max=127, eps=0.000244140625){})
calibrating...


                                               

post int8_model acc :80.38 %




In [3]:
# qconfig = torch.quantization.get_default_qconfig("fbgemm")
qconfig = torch.ao.quantization.QConfig(  # type: ignore[assignment]
                activation=torch.ao.quantization.default_histogram_observer, weight=torch.ao.quantization.weight_observer_range_neg_127_to_127
            )
print(qconfig)

model.load_state_dict(torch.load("./models/q_mobilenetv2_cifar10.pt"))
quat_model = copy.deepcopy(model)
quantize_model(quat_model, data= train_loader,qconfig=qconfig)
_,int8_acc = Evaluating(quat_model,test_loader,"cpu")
print(f"post int8_model acc :{int8_acc:.2f} %")

QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.HistogramObserver'>, quant_min=0, quant_max=127){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric, quant_min=-127, quant_max=127, eps=0.000244140625){})
Q config = QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.HistogramObserver'>, quant_min=0, quant_max=127){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric, quant_min=-127, quant_max=127, eps=0.000244140625){})
calibrating...


                                               

post int8_model acc :87.49 %




In [4]:
from models import quat_mobilenet_v2,mobilenet_v2
from models import Quant_ReLU
from torchsummary import summary
from utils import Data
from utils import Train
import torch
import numpy as np
from tqdm import tqdm
from utils.Train import Evaluating
from utils.Train import custom_quant_weights,custom_dequant_weights
from models.mobilenetv2 import replace_Qrelu, replace_relu, MobileNet_V2_Weights
from utils import set_random_seeds
# device 
if torch.cuda.is_available():
    gpu_device = torch.device("cuda")
cpu_device = torch.device("cpu")

# set random 
set_random_seeds(42)

# model load
model = quat_mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V1,activation_layer=torch.nn.ReLU)
model.classifier.append(torch.nn.Dropout(0.2))
model.classifier.append(torch.nn.Linear(1000, 10))
model.load_state_dict(torch.load("./models/mobilenetv2_cifar10.pt"))
# model = mobilenet_v2(cifar10=True)
# summary(model,(3,32,32),device="cpu")
# data load
train_loader, test_loader = Data.Cifar10_Dataloader()

# optimizer 
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5, momentum=0.9, weight_decay=5e-4)

# scheduler 
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20,60,90], gamma=0.5)

# train model
criterion = torch.nn.CrossEntropyLoss(label_smoothing=0.0)
count = 0
best_loss = np.Inf
# Training
model.to(gpu_device)
val_loss, val_acc = Evaluating(model,test_loader,device=gpu_device,criterion=criterion)
print("Before Training")
print(f"Validation {val_loss:.4f} Loss, {val_acc:.2f} Acc")

# with torch.autograd.set_detect_anomaly(True):
for epoch in range(100):

    running_loss = 0
    running_corrects = 0
    model.train()
    for inputs, labels in tqdm(iter(train_loader),leave=False):
        inputs = inputs.to(gpu_device)
        labels = labels.to(gpu_device)
        # input 
        with torch.no_grad():
            check = []
            for i in range(inputs.size(0)):
                temp = inputs.clone()
                M = torch.max(temp[i])
                m = torch.min(temp[i])
                check.append([m,M])
                inputs[i] = torch.round(254*(inputs[i]-m)/(M-m)-127)/1000
            
            for i in range(inputs.size(0)):
                m = check[i][0]
                M = check[i][1]
                inputs[i] = (1000*inputs[i]+127)*(M-m)/254+m

        optimizer.zero_grad()
        # forward + backward + optimize
        model,backup = custom_quant_weights(model)
        model = custom_dequant_weights(model,backup)
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        _, preds = torch.max(outputs, 1)
        # statistics
        running_loss += loss.item() * labels.size(0)
        running_corrects += (preds == labels).sum().item()
        
    # Set learning rate scheduler
    if scheduler is not None:
        scheduler.step()
    train_loss = running_loss / len(train_loader.dataset)
    train_accuracy = 100 * running_corrects / len(train_loader.dataset) 

    # Evaluation
    val_loss, val_acc = Evaluating(model,test_loader,device=gpu_device,criterion=criterion)
    print(f"--------{epoch+1}----------")
    print(f"Train {train_loss:.4f} Loss, {train_accuracy:.2f} Acc")
    print(f"Validation {val_loss:.4f} Loss, {val_acc:.2f} Acc")
    if best_loss > val_loss:
        best_loss = val_loss
        count = 0
        torch.save(model.state_dict(), f"./models/input_weight_q_mobilenetv2_cifar10.pt")
    else:
        count +=1
        if count > 10:
            break

Files already downloaded and verified
Files already downloaded and verified
Train data set = 50000, Test = 10000


                                               

Before Training
Validation 0.3452 Loss, 90.60 Acc


                                                 

--------1----------
Train 0.2269 Loss, 95.79 Acc
Validation 0.3494 Loss, 90.66 Acc


                                                 

--------2----------
Train 0.2288 Loss, 95.59 Acc
Validation 0.3479 Loss, 90.70 Acc


                                                 

--------3----------
Train 0.2229 Loss, 95.74 Acc
Validation 0.3495 Loss, 90.73 Acc


                                                 

--------4----------
Train 0.2228 Loss, 95.73 Acc
Validation 0.3458 Loss, 90.63 Acc


                                                 

--------5----------
Train 0.2221 Loss, 95.68 Acc
Validation 0.3480 Loss, 90.65 Acc


                                                 

--------6----------
Train 0.2185 Loss, 95.74 Acc
Validation 0.3463 Loss, 90.69 Acc


                                                 

--------7----------
Train 0.2165 Loss, 95.79 Acc
Validation 0.3426 Loss, 90.62 Acc


                                                 

--------8----------
Train 0.2178 Loss, 95.69 Acc
Validation 0.3440 Loss, 90.61 Acc


                                                 

--------9----------
Train 0.2141 Loss, 95.76 Acc
Validation 0.3455 Loss, 90.58 Acc


                                                 

--------10----------
Train 0.2162 Loss, 95.49 Acc
Validation 0.3388 Loss, 90.56 Acc


                                                 

--------11----------
Train 0.2150 Loss, 95.72 Acc
Validation 0.3396 Loss, 90.60 Acc


                                                 

--------12----------
Train 0.2141 Loss, 95.53 Acc
Validation 0.3439 Loss, 90.52 Acc


                                                 

--------13----------
Train 0.2111 Loss, 95.61 Acc
Validation 0.3423 Loss, 90.60 Acc


                                                 

--------14----------
Train 0.2089 Loss, 95.61 Acc
Validation 0.3342 Loss, 90.56 Acc


                                                 

--------15----------
Train 0.2059 Loss, 95.65 Acc
Validation 0.3401 Loss, 90.51 Acc


                                                 

--------16----------
Train 0.2089 Loss, 95.55 Acc
Validation 0.3352 Loss, 90.58 Acc


                                                 

--------17----------
Train 0.2061 Loss, 95.50 Acc
Validation 0.3381 Loss, 90.51 Acc


                                                 

--------18----------
Train 0.2039 Loss, 95.51 Acc
Validation 0.3372 Loss, 90.35 Acc


                                                 

--------19----------
Train 0.2027 Loss, 95.59 Acc
Validation 0.3371 Loss, 90.33 Acc


                                                 

--------20----------
Train 0.2005 Loss, 95.49 Acc
Validation 0.3334 Loss, 90.43 Acc


                                                 

--------21----------
Train 0.1991 Loss, 95.52 Acc
Validation 0.3350 Loss, 90.39 Acc


                                                 

--------22----------
Train 0.2010 Loss, 95.53 Acc
Validation 0.3351 Loss, 90.50 Acc


                                                 

--------23----------
Train 0.2040 Loss, 95.29 Acc
Validation 0.3309 Loss, 90.41 Acc


                                                 

--------24----------
Train 0.1974 Loss, 95.52 Acc
Validation 0.3311 Loss, 90.47 Acc


                                                 

--------25----------
Train 0.1978 Loss, 95.50 Acc
Validation 0.3281 Loss, 90.41 Acc


                                                 

--------26----------
Train 0.1964 Loss, 95.37 Acc
Validation 0.3308 Loss, 90.45 Acc


                                                 

--------27----------
Train 0.1964 Loss, 95.40 Acc
Validation 0.3278 Loss, 90.53 Acc


                                                 

--------28----------
Train 0.1950 Loss, 95.51 Acc
Validation 0.3336 Loss, 90.35 Acc


                                                 

--------29----------
Train 0.1951 Loss, 95.40 Acc
Validation 0.3293 Loss, 90.33 Acc


                                                 

--------30----------
Train 0.1918 Loss, 95.48 Acc
Validation 0.3276 Loss, 90.49 Acc


                                                 

--------31----------
Train 0.1928 Loss, 95.37 Acc
Validation 0.3292 Loss, 90.46 Acc


                                                 

--------32----------
Train 0.1924 Loss, 95.42 Acc
Validation 0.3284 Loss, 90.39 Acc


                                                 

--------33----------
Train 0.1922 Loss, 95.31 Acc
Validation 0.3279 Loss, 90.31 Acc


                                                 

--------34----------
Train 0.1909 Loss, 95.41 Acc
Validation 0.3329 Loss, 90.31 Acc


                                                 

--------35----------
Train 0.1902 Loss, 95.41 Acc
Validation 0.3268 Loss, 90.49 Acc


                                                 

--------36----------
Train 0.1908 Loss, 95.42 Acc
Validation 0.3269 Loss, 90.46 Acc


                                                 

--------37----------
Train 0.1888 Loss, 95.38 Acc
Validation 0.3260 Loss, 90.50 Acc


                                                 

--------38----------
Train 0.1854 Loss, 95.56 Acc
Validation 0.3306 Loss, 90.25 Acc


                                                 

--------39----------
Train 0.1895 Loss, 95.20 Acc
Validation 0.3302 Loss, 90.37 Acc


                                                 

--------40----------
Train 0.1859 Loss, 95.33 Acc
Validation 0.3235 Loss, 90.37 Acc


                                                 

--------41----------
Train 0.1841 Loss, 95.38 Acc
Validation 0.3254 Loss, 90.28 Acc


                                                 

--------42----------
Train 0.1867 Loss, 95.31 Acc
Validation 0.3285 Loss, 90.35 Acc


                                                 

--------43----------
Train 0.1858 Loss, 95.27 Acc
Validation 0.3221 Loss, 90.40 Acc


                                                 

--------44----------
Train 0.1854 Loss, 95.29 Acc
Validation 0.3254 Loss, 90.28 Acc


                                                 

--------45----------
Train 0.1802 Loss, 95.35 Acc
Validation 0.3260 Loss, 90.32 Acc


                                                 

--------46----------
Train 0.1813 Loss, 95.38 Acc
Validation 0.3237 Loss, 90.40 Acc


                                                 

--------47----------
Train 0.1814 Loss, 95.25 Acc
Validation 0.3220 Loss, 90.29 Acc


                                                 

--------48----------
Train 0.1801 Loss, 95.28 Acc
Validation 0.3210 Loss, 90.48 Acc


                                                 

--------49----------
Train 0.1821 Loss, 95.12 Acc
Validation 0.3231 Loss, 90.34 Acc


                                                 

--------50----------
Train 0.1783 Loss, 95.30 Acc
Validation 0.3217 Loss, 90.38 Acc


                                                 

--------51----------
Train 0.1802 Loss, 95.29 Acc
Validation 0.3202 Loss, 90.53 Acc


                                                 

--------52----------
Train 0.1773 Loss, 95.29 Acc
Validation 0.3215 Loss, 90.36 Acc


                                                 

--------53----------
Train 0.1797 Loss, 95.15 Acc
Validation 0.3232 Loss, 90.33 Acc


                                                 

--------54----------
Train 0.1772 Loss, 95.22 Acc
Validation 0.3214 Loss, 90.45 Acc


                                                 

--------55----------
Train 0.1755 Loss, 95.18 Acc
Validation 0.3204 Loss, 90.35 Acc


                                                 

--------56----------
Train 0.1789 Loss, 95.15 Acc
Validation 0.3247 Loss, 90.29 Acc


                                                 

--------57----------
Train 0.1766 Loss, 95.20 Acc
Validation 0.3228 Loss, 90.38 Acc


                                                 

--------58----------
Train 0.1765 Loss, 95.15 Acc
Validation 0.3335 Loss, 90.22 Acc


                                                 

--------59----------
Train 0.1729 Loss, 95.20 Acc
Validation 0.3274 Loss, 90.28 Acc


                                                 

--------60----------
Train 0.1742 Loss, 95.21 Acc
Validation 0.3240 Loss, 90.33 Acc


                                                 

--------61----------
Train 0.1775 Loss, 95.18 Acc
Validation 0.3214 Loss, 90.44 Acc


                                                 

--------62----------
Train 0.1736 Loss, 95.23 Acc
Validation 0.3212 Loss, 90.38 Acc




In [15]:
# qconfig = torch.quantization.get_default_qconfig("fbgemm")
qconfig = torch.quantization.get_default_qconfig("fbgemm")
print(qconfig)
model.to(cpu_device)

print("post")
model.load_state_dict(torch.load("./models/weights/mobilenetv2_cifar10.pt"))
quat_model = copy.deepcopy(model)
quantize_model(quat_model, data= train_loader,qconfig=qconfig)
_,int8_acc = Evaluating(quat_model,test_loader,"cpu")
print(f"post int8_model acc :{int8_acc:.2f} %")

print("input - weight qat")
model.load_state_dict(torch.load("./models/weights/q_mobilenetv2_cifar10.pt"))
quat_model = copy.deepcopy(model)
quantize_model(quat_model, data= train_loader,qconfig=qconfig)
_,int8_acc = Evaluating(quat_model,test_loader,"cpu")
print(f"post int8_model acc :{int8_acc:.2f} %")



QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.HistogramObserver'>, reduce_range=True){}, weight=functools.partial(<class 'torch.ao.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric){})
post
Q config = QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.HistogramObserver'>, reduce_range=True){}, weight=functools.partial(<class 'torch.ao.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric){})
calibrating...


                                               

post int8_model acc :82.71 %
input - weight qat
Q config = QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.HistogramObserver'>, reduce_range=True){}, weight=functools.partial(<class 'torch.ao.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric){})
calibrating...


                                                 

post int8_model acc :83.62 %


