Enabled per channel quantized static linear/conv

Summary: Enable channelwise quantized test on qlinear and qconv. Dynmaic linear to follow. Test Plan: pytest test/quantization/test_quantized.py pytest test/quantization/test_quantized_module.py Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 3ad2892fb90e634d48f56d860a942e98b0c70639 Pull Request resolved: pytorch/pytorch#37622
xuezhou1998 · May 7, 2020 · b34e8cd · b34e8cd
1 parent 7c73070
commit b34e8cd
Show file tree

Hide file tree

Showing 7 changed files with 24 additions and 15 deletions.
diff --git a/aten/src/ATen/native/quantized/cpu/qconv.cpp b/aten/src/ATen/native/quantized/cpu/qconv.cpp
@@ -536,9 +536,20 @@ at::Tensor PackedConvWeightsQnnp<kSpatialDim>::apply_impl(
     for (int i = 0; i < wt_numel; ++i) {
       qnnp_w_data[i] = static_cast<c10::quint8>(w_data[i] + 128);
     }
+    at::Tensor bias;
     // Original bias was float, so we requantize it here.
-    auto bias = at::quantize_per_tensor(
-        bias_fp32, weight_scales_data[0] * act_input_scale, 0, c10::kQInt32);
+    if (is_per_channel) {
+      at::Tensor bias_quant_scales = weight_contig.q_per_channel_scales() * act_input_scale;
+      at::Tensor bias_zp = at::zeros(bias_quant_scales.sizes(), c10::kInt);
+      bias = at::native::quantize_per_channel_cpu(
+          bias_fp32, bias_quant_scales, bias_zp, 0, c10::kQInt32);
+    } else {
+      bias = at::native::quantize_per_tensor(
+          bias_fp32,
+          weight_contig.q_scale() * act_input_scale,
+          0,
+          c10::kQInt32);
+    }
 
     conv_p = qnnpack::conv_param_t(
         {kernel_w, kernel_h},

diff --git a/aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp b/aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp
@@ -168,10 +168,6 @@ c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeightsQnnp<kSpa
       weight.ndimension() == 4,
       "quantized::conv2d_prepack (qnnpack): Weights are expected to have 4 "
       "dimensions");
-  TORCH_CHECK(
-      weight.qscheme() == c10::kPerTensorAffine,
-      "quantized::conv2d_prepack (qnnpack): only supports Per Tensor "
-      "Quantization Scheme")
   TORCH_CHECK(
       stride.size() == 2,
       "quantized::conv2d_prepack (qnnpack): 2D convolution only");

diff --git a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp
@@ -223,6 +223,10 @@ class QLinearDynamicInt8 final {
 
     auto& pack_ptr =
         cpp_custom_type_hack::cast<PackedLinearWeightsQnnp>(packed_weight);
+    TORCH_CHECK(
+        pack_ptr.orig_weight.qscheme() == kPerTensorAffine,
+        "quantized::linear_dynamic (qnnpack) only supports "
+        "Per Tensor Quantization Scheme");
     auto packB = pack_ptr.w.get();
     // Adjust weight zero point, similar to weight data.
     auto kernel_zp = pack_ptr.orig_weight.q_zero_point() + 128;

diff --git a/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp b/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp
@@ -132,9 +132,6 @@ class QLinearPackWeightInt8 final {
     TORCH_CHECK(
         weight.dim() == 2,
         "quantized::linear_prepack (qnnpack): Weight tensor rank should be == 2");
-    TORCH_CHECK(
-        weight.qscheme() == kPerTensorAffine,
-        "quantized::linear_prepack (qnnpack) only supports Per Tensor Quantization Scheme")
 
     int64_t rows_w = weight.size(0);
     Tensor bias_fp32;

diff --git a/test/quantization/test_quantized_functional.py b/test/quantization/test_quantized_functional.py
@@ -101,7 +101,6 @@ def test_conv2d_api(
         if qengine == 'qnnpack':
             if IS_PPC or TEST_WITH_UBSAN:
                 return
-            use_channelwise = False
 
         input_feature_map_size = (H, W)
         kernel_size = (kernel_h, kernel_w)

diff --git a/test/quantization/test_quantized_module.py b/test/quantization/test_quantized_module.py
@@ -372,7 +372,6 @@ def test_conv2d_api(
         if qengine == 'qnnpack':
             if IS_PPC or TEST_WITH_UBSAN:
                 return
-            use_channelwise = False
 
         in_channels = in_channels_per_group * groups
         out_channels = out_channels_per_group * groups

diff --git a/test/quantization/test_quantized_op.py b/test/quantization/test_quantized_op.py
@@ -1932,7 +1932,6 @@ def test_qlinear(self, batch_size, input_channels, output_channels, use_bias,
             # QNNPACK qlinear is flaky on MACOS. Issue #27326
             if IS_PPC or TEST_WITH_UBSAN or IS_MACOS:
                 return
-            use_channelwise = False
             use_multi_dim_input = False
             # QNNPACK supports uint8 in the kernels. In the op we shift the int8
             # weight values to uint8 to be on par with fbgemm. However, this causes
@@ -2054,7 +2053,6 @@ def test_qlinear_unpack(self, W, use_channelwise, qengine):
         if qengine == 'qnnpack':
             if IS_PPC or TEST_WITH_UBSAN:
                 return
-            use_channelwise = False
 
         with override_quantized_engine(qengine):
             W, (W_scale, W_zp, torch_type) = W
@@ -2310,13 +2308,19 @@ def test_qconv2d(
             use_channelwise,
             qengine
     ):
+        # Added this to make it more likely to find a failing example.
+        if use_channelwise and qengine == 'qnnpack':
+            use_bias = True
+            groups = 1
+            output_channels_per_group = \
+                2 if output_channels_per_group < 2 else output_channels_per_group
+
         if qengine not in torch.backends.quantized.supported_engines:
             return
         if qengine == 'qnnpack':
             # QNNPACK qconv is flaky on MACOS. Issue #27326
             if IS_PPC or TEST_WITH_UBSAN or IS_MACOS:
                 return
-            use_channelwise = False
 
         input_channels = input_channels_per_group * groups
         output_channels = output_channels_per_group * groups
@@ -2374,7 +2378,6 @@ def test_qconv_unpack(
         if qengine == 'qnnpack':
             if IS_PPC or TEST_WITH_UBSAN:
                 return
-            channelwise = False
 
         with override_quantized_engine(qengine):
             qconv_prepack = torch.ops.quantized.conv2d_prepack