<a href="https://colab.research.google.com/github/ychsiao0809/QuantizationForYolov5/blob/main/Quantization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Quantization
$x=s(x_q-z)$

$x_q=round(\frac{1}{s}x+z)$

假設上變數 $x$ 範圍為 $(\alpha, \beta)$

經過量化後的變數 $x_q$ 範圍為 $(\alpha_q, \beta_q)$

則 $\displaystyle s = \frac{\beta-\alpha}{\beta_q-\alpha_q}$

$\displaystyle z = round\left(\frac{\beta\alpha_q-\alpha\beta_q}{\beta-\alpha}\right)$

In [None]:
!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5
%pip install -qr requirements.txt  # install dependencies


Cloning into 'yolov5'...
remote: Enumerating objects: 7160, done.[K
remote: Counting objects: 100% (266/266), done.[K
remote: Compressing objects: 100% (162/162), done.[K
remote: Total 7160 (delta 157), reused 182 (delta 104), pack-reused 6894[K
Receiving objects: 100% (7160/7160), 9.21 MiB | 27.49 MiB/s, done.
Resolving deltas: 100% (4895/4895), done.
/content/yolov5/yolov5
[31mERROR: Operation cancelled by user[0m


In [None]:
import torch
import yolov5
# from yolov5.models.experimental import attempt_load


In [None]:
class MySiLU(torch.nn.Module):
  def __init__(self):
    super(MySiLU, self).__init__()
    self.model = torch.nn.SiLU()
    self.dequant = torch.quantization.DeQuantStub()
    self.quant = torch.quantization.QuantStub()
  def forward(self, x):
    x = self.dequant(x)
    x = self.model(x)
    x = self.quant(x)

    return x    

In [None]:
import importlib
importlib.reload(yolov5)

<module 'yolov5' (namespace)>

In [None]:

class QuantModel(torch.nn.Module):
  def __init__(self, weights, device):
    super(QuantModel, self).__init__()
    self.quant = torch.quantization.QuantStub()
    self.model = yolov5.models.experimental.attempt_load(weights, map_location=device)
    for m in self.model.modules():
      # print(m)
      if hasattr(m, 'act') and isinstance(m.act, torch.nn.SiLU):
        setattr(m, 'act', MySiLU())
    self.dequant = torch.quantization.DeQuantStub()

  def forward(self, x):
    x = self.quant(x)
    x = self.model(x)
    x = self.dequant(x)

    return x
    

In [None]:
def save_torchscript_model(model, model_dir, model_filename):
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    model_filepath = os.path.join(model_dir, model_filename)
    torch.jit.save(torch.jit.script(model), model_filepath)

In [None]:
weights = '/best.pt'
device = 'cpu'

model_fp32 = QuantModel(weights, device)

model_fp32.eval()

model_fp32.qconfig = torch.quantization.get_default_qconfig('fbgemm')

# model_fp32_fused = torch.quantization.fuse_modules(model_fp32, [['conv', 'relu']])

model_fp32_prepared = torch.quantization.prepare(model_fp32)

input_fp32 = torch.randn(1, 3, 384, 640)
model_fp32_prepared(input_fp32)

model_int8 = torch.quantization.convert(model_fp32_prepared)

res = model_int8(input_fp32)

save_torchscript_model(model_int8)

  reduce_range will be deprecated in a future release of PyTorch."


RuntimeError: ignored

In [None]:
def load_torchscript_model(model_filepath, device):

    model = torch.jit.load(model_filepath, map_location=device)

    return model