In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim



In [2]:
# 데이터 변환 설정
transform = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616))
])

# 데이터 로더 설정
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [33]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = cifar100_resnet56(pretrained=True)
# model = model.to(device)
from cifar10_models.resnet import resnet18
model = resnet18(pretrained=True)


In [8]:
import time
import numpy as np

model.eval()
model = model.to(device)
correct = 0
total = 0
results = []
with torch.no_grad():
    for inputs, labels in testloader:
        torch.cuda.synchronize()
        inputs, labels = inputs.to(device), labels.to(device)
        start = time.time()
        outputs = model(inputs)
        torch.cuda.synchronize()
        end = time.time()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        results.append(end-start)

infer_time = np.mean(results)
print(infer_time)
print(f"Accuracy on test set: {100 * correct / total:.2f}%")

0.006441243087189107
Accuracy on test set: 93.07%


In [9]:
from aimet_torch.v2.batch_norm_fold import fold_all_batch_norms

_ = fold_all_batch_norms(model, input_shapes=(1, 3, 32, 32))

2024-12-22 11:15:56,077 - root - INFO - AIMET


In [10]:
from aimet_common.defs import QuantScheme
from aimet_torch.v1.quantsim import QuantizationSimModel

dummy_input = torch.rand(1, 3, 32, 32)    # Shape for each ImageNet sample is (3 channels) x (224 height) x (224 width)

dummy_input = dummy_input.cuda()

sim = QuantizationSimModel(model=model,
                           quant_scheme=QuantScheme.post_training_tf_enhanced,
                           dummy_input=dummy_input,
                           default_output_bw=8,
                           default_param_bw=4)

2024-12-22 11:16:00,908 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.10/dist-packages/aimet_common/quantsim_config/default_config.json
2024-12-22 11:16:00,928 - Quant - INFO - Unsupported op type Squeeze
2024-12-22 11:16:00,928 - Quant - INFO - Unsupported op type Mean
2024-12-22 11:16:00,929 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default


In [11]:
use_cuda = True


In [12]:
import time
import numpy as np
correct = 0
total = 0
results = []
with torch.no_grad():
    for inputs, labels in testloader:
        torch.cuda.synchronize()
        inputs, labels = inputs.to(device), labels.to(device)
        start = time.time()
        outputs = sim.model(inputs)
        torch.cuda.synchronize()
        end = time.time()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        results.append(end-start)

infer_time = np.mean(results)
print(infer_time)
print(f"Accuracy on test set: {100 * correct / total:.2f}%")

0.26224361793904366
Accuracy on test set: 90.60%


In [13]:
def pass_calibration_data(sim_model, use_cuda):
    data_loader = testloader
    batch_size = data_loader.batch_size

    if use_cuda:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    sim_model.eval()
    samples = 1000

    batch_cntr = 0
    with torch.no_grad():
        for input_data, target_data in data_loader:

            inputs_batch = input_data.to(device)
            sim_model(inputs_batch)

            batch_cntr += 1
            if (batch_cntr * batch_size) > samples:
                break

In [14]:
sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=use_cuda)

In [15]:
import time
import numpy as np
correct = 0
total = 0
results = []
with torch.no_grad():
    for inputs, labels in testloader:
        torch.cuda.synchronize()
        inputs, labels = inputs.to(device), labels.to(device)
        start = time.time()
        outputs = sim.model(inputs)
        torch.cuda.synchronize()
        end = time.time()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        results.append(end-start)

infer_time = np.mean(results)
print(infer_time)
print(f"Accuracy on test set: {100 * correct / total:.2f}%")

0.006863796258274513
Accuracy on test set: 90.39%


In [16]:
from aimet_torch.v1.adaround.adaround_weight import Adaround, AdaroundParameters
import os

data_loader = testloader
params = AdaroundParameters(data_loader=data_loader, num_batches=1, default_num_iterations=64)

dummy_input = torch.rand(1, 3, 32, 32)
if use_cuda:
    dummy_input = dummy_input.cuda()

os.makedirs('./output/', exist_ok=True)
ada_model = Adaround.apply_adaround(model, dummy_input, params,
                                    path="output", 
                                    filename_prefix='adaround', 
                                    default_param_bw=4,
                                    default_quant_scheme=QuantScheme.post_training_tf_enhanced)

2024-12-22 11:16:31,032 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.10/dist-packages/aimet_common/quantsim_config/default_config.json
2024-12-22 11:16:31,052 - Quant - INFO - Unsupported op type Squeeze
2024-12-22 11:16:31,053 - Quant - INFO - Unsupported op type Mean
2024-12-22 11:16:31,054 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default


2024-12-22 11:16:31,400 - Utils - INFO - Caching 1 batches from data loader at path location: /tmp/tmpeeelp1rg


                                      

2024-12-22 11:16:31,442 - Quant - INFO - Started Optimizing weight rounding of module: conv1


                                              

2024-12-22 11:16:32,041 - Quant - INFO - Started Optimizing weight rounding of module: layer1.0.conv1


                                              

2024-12-22 11:16:32,224 - Quant - INFO - Started Optimizing weight rounding of module: layer1.0.conv2


                                              

2024-12-22 11:16:32,388 - Quant - INFO - Started Optimizing weight rounding of module: layer1.1.conv1


                                               

2024-12-22 11:16:32,558 - Quant - INFO - Started Optimizing weight rounding of module: layer1.1.conv2


                                               

2024-12-22 11:16:32,727 - Quant - INFO - Started Optimizing weight rounding of module: layer2.0.conv1


                                               

2024-12-22 11:16:32,904 - Quant - INFO - Started Optimizing weight rounding of module: layer2.0.conv2


                                               

2024-12-22 11:16:33,049 - Quant - INFO - Started Optimizing weight rounding of module: layer2.0.downsample.0


                                               

2024-12-22 11:16:33,186 - Quant - INFO - Started Optimizing weight rounding of module: layer2.1.conv1


                                               

2024-12-22 11:16:33,321 - Quant - INFO - Started Optimizing weight rounding of module: layer2.1.conv2


                                               

2024-12-22 11:16:33,455 - Quant - INFO - Started Optimizing weight rounding of module: layer3.0.conv1


                                               

2024-12-22 11:16:33,594 - Quant - INFO - Started Optimizing weight rounding of module: layer3.0.conv2


                                               

2024-12-22 11:16:33,733 - Quant - INFO - Started Optimizing weight rounding of module: layer3.0.downsample.0


                                               

2024-12-22 11:16:33,867 - Quant - INFO - Started Optimizing weight rounding of module: layer3.1.conv1


                                               

2024-12-22 11:16:34,008 - Quant - INFO - Started Optimizing weight rounding of module: layer3.1.conv2


                                               

2024-12-22 11:16:34,143 - Quant - INFO - Started Optimizing weight rounding of module: layer4.0.conv1


                                               

2024-12-22 11:16:34,290 - Quant - INFO - Started Optimizing weight rounding of module: layer4.0.conv2


                                               

2024-12-22 11:16:34,465 - Quant - INFO - Started Optimizing weight rounding of module: layer4.0.downsample.0


                                               

2024-12-22 11:16:34,620 - Quant - INFO - Started Optimizing weight rounding of module: layer4.1.conv1


                                               

2024-12-22 11:16:34,800 - Quant - INFO - Started Optimizing weight rounding of module: layer4.1.conv2


                                               

2024-12-22 11:16:34,980 - Quant - INFO - Started Optimizing weight rounding of module: fc


100%|██████████| 60/60 [00:03<00:00, 16.15it/s]

2024-12-22 11:16:35,125 - Quant - INFO - Completed Adarounding Model





In [17]:
sim = QuantizationSimModel(model=ada_model,
                           dummy_input=dummy_input,
                           quant_scheme=QuantScheme.post_training_tf_enhanced,
                           default_output_bw=8, 
                           default_param_bw=4)

sim.set_and_freeze_param_encodings(encoding_path=os.path.join("output", 'adaround.encodings'))

sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=use_cuda)

2024-12-22 11:16:35,224 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.10/dist-packages/aimet_common/quantsim_config/default_config.json
2024-12-22 11:16:35,243 - Quant - INFO - Unsupported op type Squeeze
2024-12-22 11:16:35,244 - Quant - INFO - Unsupported op type Mean
2024-12-22 11:16:35,245 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default


  sim.set_and_freeze_param_encodings(encoding_path=os.path.join("output", 'adaround.encodings'))


In [18]:
import time
import numpy as np
correct = 0
total = 0
results = []
with torch.no_grad():
    for inputs, labels in testloader:
        torch.cuda.synchronize()
        inputs, labels = inputs.to(device), labels.to(device)
        start = time.time()
        outputs = sim.model(inputs)
        torch.cuda.synchronize()
        end = time.time()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        results.append(end-start)

infer_time = np.mean(results)
print(infer_time)
print(f"Accuracy on test set: {100 * correct / total:.2f}%")

0.0070584333395656155
Accuracy on test set: 91.46%
