In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
from torch import nn
from torch.nn import functional as F
import torch.quantization as tq

In [3]:
import torch.nn.quantized as nnq

class ConvModel(nn.Module):
    def __init__(self, transposed=False):
        super().__init__()
        if transposed:
            self.conv = nn.ConvTranspose2d(3, 5, 3, bias=False).to(torch.float)
        else:
            self.conv = nn.Conv2d(3, 5, 3, bias=False).to(torch.float)
            
    def forward(self, x):
        return self.conv(x)
    
class AnnotatedConvModel(nn.Module):
    def __init__(self, qengine, transposed=False):
        super().__init__()
        self.qconfig = tq.get_default_qconfig(qengine)
        self.quant = tq.QuantStub()
        if transposed:
            self.conv = nn.ConvTranspose2d(3, 5, 3, bias=False).to(torch.float)
        else:
            self.conv = nn.Conv2d(3, 5, 3, bias=False).to(torch.float)
        self.dequant = tq.DeQuantStub()
        
    def forward(self, x):
        x = self.quant(x)
        x = self.conv(x)
        x = self.dequant(x)
        return x

In [4]:
img_data_2d = [[torch.rand(1, 3, 10, 10, dtype=torch.float)] for _ in range(2)]
x = img_data_2d[0][0]

In [11]:
from torch.autograd import profiler

qengine = torch.backends.quantized.engine = 'qnnpack'
anno_model = AnnotatedConvModel(qengine, transposed=True).eval()
with profiler.profile(record_shapes=True) as prof:
    with profiler.record_function("model_inference"):
        y = anno_model(x)
        
print(prof.key_averages().table(sort_by="cpu_time_total"))

-------------------------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  
Name                             Self CPU total %  Self CPU total   CPU total %      CPU total        CPU time avg     Number of Calls  
-------------------------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  
model_inference                  35.72%           57.460us         100.00%          160.840us        160.840us        1                
aten::conv_transpose2d           3.72%            5.990us          63.32%           101.850us        101.850us        1                
aten::convolution                2.96%            4.760us          59.60%           95.860us         95.860us         1                
aten::_convolution               7.50%            12.070us         56.64%           91.100us         91.100us         1                
aten::_convolution_nogroup       2.69%         

In [13]:
def eval_fn(model, data):
    for inp in data:
        model(*inp)

anno_model_eager = tq.quantize(anno_model, eval_fn, img_data_2d)

with profiler.profile(record_shapes=True) as prof:
    with profiler.record_function("model_inference"):
        qy = anno_model_eager(img_data_2d[0][0])

print(prof.key_averages().table(sort_by="cpu_time_total"))

AnnotatedConvModel(
  (quant): QuantStub()
  (conv): ConvTranspose2d(3, 5, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (dequant): DeQuantStub()
)
AnnotatedConvModel(
  (quant): QuantStub(
    (activation_post_process): HistogramObserver()
  )
  (conv): ConvTranspose2d(
    3, 5, kernel_size=(3, 3), stride=(1, 1), bias=False
    (activation_post_process): HistogramObserver()
  )
  (dequant): DeQuantStub()
)
AnnotatedConvModel(
  (quant): QuantStub(
    (activation_post_process): HistogramObserver()
  )
  (conv): ConvTranspose2d(
    3, 5, kernel_size=(3, 3), stride=(1, 1), bias=False
    (activation_post_process): HistogramObserver()
  )
  (dequant): DeQuantStub()
)
AnnotatedConvModel(
  (quant): Quantize(scale=tensor([0.0039]), zero_point=tensor([0]), dtype=torch.quint8)
  (conv): QuantizedConvTranpose2d(3, 5, kernel_size=(3, 3), stride=(1, 1), scale=0.004635039251297712, zero_point=118)
  (dequant): DeQuantize()
)
---------------------------------  ---------------  --------------

In [8]:
a = b = 3
a, b

(3, 3)

# Old stuff

In [1]:
import torch
import onnx

In [2]:
from torch import nn
import torch.nn.quantized as nnq

class SimpleModel(torch.nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()
        self.func_add = nnq.FloatFunctional()
        self.conv1 = nn.Conv2d(3, 2, 5, bias=None).to(dtype=torch.float)
        self.act1 = nn.Sigmoid()
        self.conv2 = nn.Conv2d(2, 2, 1, bias=None).to(dtype=torch.float)
        self.fc = nn.Linear(72, 10).to(dtype=torch.float)
        self.fc.qconfig = None

    def forward(self, x):
        x = self.quant(x)
        x = self.func_add.add(x, x)
        x = self.conv1(x)
        x = self.act1(x)
        x = self.conv2(x)
        x = self.dequant(x)
        x = x.view(-1, 72).contiguous()
        x = self.fc(x)
        return x
model = SimpleModel()
print(model)

SimpleModel(
  (quant): QuantStub()
  (dequant): DeQuantStub()
  (func_add): FloatFunctional(
    (activation_post_process): Identity()
  )
  (conv1): Conv2d(3, 2, kernel_size=(5, 5), stride=(1, 1), bias=False)
  (act1): Sigmoid()
  (conv2): Conv2d(2, 2, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (fc): Linear(in_features=72, out_features=10, bias=True)
)


In [3]:
from torchsummary import summary
import numpy as np
X = np.random.rand(2, 3, 10, 10).astype("float32")
X_tuple = (X,)

summary(model, X.shape[1:], device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         QuantStub-1            [-1, 3, 10, 10]               0
          Identity-2            [-1, 3, 10, 10]               0
            Conv2d-3              [-1, 2, 6, 6]             150
           Sigmoid-4              [-1, 2, 6, 6]               0
            Conv2d-5              [-1, 2, 6, 6]               4
       DeQuantStub-6              [-1, 2, 6, 6]               0
            Linear-7                   [-1, 10]             730
Total params: 884
Trainable params: 884
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 0.00
Estimated Total Size (MB): 0.01
----------------------------------------------------------------


In [4]:
torch.backends.quantized.engine = "qnnpack"
pt_inputs = tuple(torch.from_numpy(x) for x in X_tuple)
model.qconfig = torch.quantization.get_default_qconfig('qnnpack')
q_model = torch.quantization.prepare(model, inplace=False)
q_model = torch.quantization.convert(q_model, inplace=False)

  Returning default scale and zero point "
  float(wt_scale), int(wt_zp), torch.qint8)


In [5]:
summary(q_model, X.shape[1:], device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
          Quantize-1            [-1, 3, 10, 10]               0
          Identity-2            [-1, 3, 10, 10]               0
            Conv2d-3              [-1, 2, 6, 6]               0
           Sigmoid-4              [-1, 2, 6, 6]               0
            Conv2d-5              [-1, 2, 6, 6]               0
        DeQuantize-6              [-1, 2, 6, 6]               0
            Linear-7                   [-1, 10]             730
Total params: 730
Trainable params: 730
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 0.00
Estimated Total Size (MB): 0.01
----------------------------------------------------------------


In [6]:
traced_model = torch.jit.trace(q_model, pt_inputs)

In [7]:
import io

buf = io.BytesIO()
torch.jit.save(traced_model, buf)
buf.seek(0)
q_model = torch.jit.load(buf)

In [8]:
q_model.eval()
output = q_model(*pt_inputs)

In [9]:
import caffe2.python.onnx.backend as c2

input_names=["x"]
relaxed_check=True
    
f = io.BytesIO()
torch.onnx.export(q_model, pt_inputs, f, input_names=input_names, example_outputs=output,
                  operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK)
f.seek(0)
onnx_model = onnx.load(f)
caffe_res = c2.run_model(onnx_model, dict(zip(input_names, X_tuple)))[0]
# Due to change in requantization logic for certain ops such conv, linear
# in pytorch's integration of qnnpack, numerics may have a mismatc with C2.
# This mismatch should not be off my more than 1.
# This flag helps us override default behavior under certain circumstances.
if relaxed_check:
    output_diff = np.absolute(np.squeeze(output.detach().numpy()) - caffe_res)
    max_diff = np.amax(output_diff)

    # This check had to be changed to account for changes in
    # qnnpack's requant logic.
    np.testing.assert_(max_diff <= 1, "Maximum absolute difference must be less than 1")
else:
    np.testing.assert_almost_equal(output.detach().numpy(), caffe_res, decimal=3)

  "`{}` argument will be ignored.".format(arg_name, arg_name))
  "`{}` argument will be ignored.".format(arg_name, arg_name))


In [10]:
def generic_test(model, sample_inputs, input_names=None, decimal=3, relaxed_check=False):
    torch.backends.quantized.engine = "qnnpack"
    pt_inputs = tuple(torch.from_numpy(x) for x in sample_inputs)
    model.qconfig = torch.quantization.get_default_qconfig('qnnpack')
    q_model = torch.quantization.prepare(model, inplace=False)
    q_model = torch.quantization.convert(q_model, inplace=False)

    traced_model = torch.jit.trace(q_model, pt_inputs)
    print(traced_model.graph)
    buf = io.BytesIO()
    torch.jit.save(traced_model, buf)
    buf.seek(0)
    q_model = torch.jit.load(buf)

    q_model.eval()
    output = q_model(*pt_inputs)

    f = io.BytesIO()
    torch.onnx.export(q_model, pt_inputs, f, input_names=input_names, example_outputs=output,
                      operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK)
    f.seek(0)
    onnx_model = onnx.load(f)
    caffe_res = c2.run_model(onnx_model, dict(zip(input_names, sample_inputs)))[0]
    # Due to change in requantization logic for certain ops such conv, linear
    # in pytorch's integration of qnnpack, numerics may have a mismatc with C2.
    # This mismatch should not be off my more than 1.
    # This flag helps us override default behavior under certain circumstances.
    if relaxed_check:
        output_diff = np.absolute(np.squeeze(output.detach().numpy()) - caffe_res)
        max_diff = np.amax(output_diff)

        # This check had to be changed to account for changes in
        # qnnpack's requant logic.
        np.testing.assert_(max_diff <= 1, "Maximum absolute difference must be less than 1")
    else:
        np.testing.assert_almost_equal(output.detach().numpy(), caffe_res, decimal=decimal)


In [11]:
class SimpleModel(torch.nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()
        self.func_add = nnq.FloatFunctional()
        self.conv1 = nn.Conv2d(3, 2, 5, bias=None).to(dtype=torch.float)
        self.act1 = nn.Sigmoid()
        self.conv2 = nn.Conv2d(2, 2, 1, bias=None).to(dtype=torch.float)
        self.fc = nn.Linear(72, 10).to(dtype=torch.float)
        self.fc.qconfig = None

    def forward(self, x):
        x = self.quant(x)
        x = self.func_add.add(x, x)
        x = self.conv1(x)
        x = self.act1(x)
        x = self.conv2(x)
        x = self.dequant(x)
        x = x.view(-1, 72).contiguous()
        x = self.fc(x)
        return x

x = np.random.rand(2, 3, 10, 10).astype("float32")
generic_test(SimpleModel(), (x,), input_names=["x"], relaxed_check=True)

graph(%self.1 : __torch__.___torch_mangle_14.SimpleModel,
      %X : Float(2:300, 3:100, 10:10, 10:1)):
  %116 : __torch__.torch.nn.modules.linear.___torch_mangle_13.Linear = prim::GetAttr[name="fc"](%self.1)
  %113 : __torch__.torch.nn.quantized.modules.DeQuantize = prim::GetAttr[name="dequant"](%self.1)
  %112 : __torch__.torch.nn.quantized.modules.conv.___torch_mangle_12.Conv2d = prim::GetAttr[name="conv2"](%self.1)
  %110 : __torch__.torch.nn.modules.activation.___torch_mangle_11.Sigmoid = prim::GetAttr[name="act1"](%self.1)
  %109 : __torch__.torch.nn.quantized.modules.conv.Conv2d = prim::GetAttr[name="conv1"](%self.1)
  %106 : __torch__.torch.nn.quantized.modules.functional_modules.QFunctional = prim::GetAttr[name="func_add"](%self.1)
  %107 : __torch__.torch.nn.modules.linear.___torch_mangle_10.Identity = prim::GetAttr[name="activation_post_process"](%106)
  %105 : __torch__.torch.nn.quantized.modules.Quantize = prim::GetAttr[name="quant"](%self.1)
  %125 : Tensor = prim::CallMe

RuntimeError: "tensor_cpu" not implemented for 'Bool'

In [3]:
import torch
from torch import nn
import torch.nn.quantized as nnq
import numpy as np
import io
import onnx
import caffe2.python.onnx.backend as c2

class SimpleModel(torch.nn.Module):
    
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()
        self.func_add = nnq.FloatFunctional()
        self.conv1 = nn.Conv2d(3, 2, 5, bias=None).to(dtype=torch.float)
        self.act1 = nn.Sigmoid()
        self.conv2 = nn.Conv2d(2, 2, 1, bias=None).to(dtype=torch.float)
        self.fc = nn.Linear(72, 10).to(dtype=torch.float)
        self.fc.qconfig = None

    def forward(self, x):
        x = self.quant(x)
        x = self.func_add.add(x, x)
        x = self.conv1(x)
        x = self.act1(x)
        x = self.conv2(x)
        x = self.dequant(x)
        x = x.view(-1, 72).contiguous()
        x = self.fc(x)
        return x

x = np.random.rand(2, 3, 10, 10).astype("float32")
# self.generic_test(SimpleModel(), (x,), input_names=["x"], relaxed_check=True)

model = SimpleModel()
sample_inputs = (x,)
input_names = ['x']
relaxed_check = True

torch.backends.quantized.engine = "qnnpack"
pt_inputs = tuple(torch.from_numpy(x) for x in sample_inputs)
model.qconfig = torch.quantization.get_default_qconfig('qnnpack')
q_model = torch.quantization.prepare(model, inplace=False)
q_model = torch.quantization.convert(q_model, inplace=False)

traced_model = torch.jit.trace(q_model, pt_inputs)
print(traced_model.graph)
buf = io.BytesIO()
torch.jit.save(traced_model, buf)
buf.seek(0)
q_model = torch.jit.load(buf)

q_model.eval()
output = q_model(*pt_inputs)

f = io.BytesIO()
torch.onnx.export(q_model, pt_inputs, f, input_names=input_names, example_outputs=output,
                  operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK)
f.seek(0)
onnx_model = onnx.load(f)
caffe_res = c2.run_model(onnx_model, dict(zip(input_names, sample_inputs)))[0]
# Due to change in requantization logic for certain ops such conv, linear
# in pytorch's integration of qnnpack, numerics may have a mismatc with C2.
# This mismatch should not be off my more than 1.
# This flag helps us override default behavior under certain circumstances.
if relaxed_check:
    output_diff = np.absolute(np.squeeze(output.detach().numpy()) - caffe_res)
    max_diff = np.amax(output_diff)

    # This check had to be changed to account for changes in
    # qnnpack's requant logic.
    np.testing.assert_(max_diff <= 1, "Maximum absolute difference must be less than 1")
else:
    np.testing.assert_almost_equal(output.detach().numpy(), caffe_res, decimal=decimal)


graph(%self.1 : __torch__.___torch_mangle_18.SimpleModel,
      %X : Float(2:300, 3:100, 10:10, 10:1)):
  %116 : __torch__.torch.nn.modules.linear.___torch_mangle_17.Linear = prim::GetAttr[name="fc"](%self.1)
  %113 : __torch__.torch.nn.quantized.modules.___torch_mangle_11.DeQuantize = prim::GetAttr[name="dequant"](%self.1)
  %112 : __torch__.torch.nn.quantized.modules.conv.___torch_mangle_16.Conv2d = prim::GetAttr[name="conv2"](%self.1)
  %110 : __torch__.torch.nn.modules.activation.___torch_mangle_15.Sigmoid = prim::GetAttr[name="act1"](%self.1)
  %109 : __torch__.torch.nn.quantized.modules.conv.___torch_mangle_14.Conv2d = prim::GetAttr[name="conv1"](%self.1)
  %106 : __torch__.torch.nn.quantized.modules.functional_modules.___torch_mangle_13.QFunctional = prim::GetAttr[name="func_add"](%self.1)
  %107 : __torch__.torch.nn.modules.linear.___torch_mangle_12.Identity = prim::GetAttr[name="activation_post_process"](%106)
  %105 : __torch__.torch.nn.quantized.modules.___torch_mangle_10.Qu

  Returning default scale and zero point "
  "`{}` argument will be ignored.".format(arg_name, arg_name))
  "`{}` argument will be ignored.".format(arg_name, arg_name))


RuntimeError: "tensor_cpu" not implemented for 'Bool'