Skip to content

Commit

Permalink
Enabled per channel quantized static linear/conv
Browse files Browse the repository at this point in the history
Summary:
Enable channelwise quantized test on qlinear and qconv.
Dynmaic linear to follow.

Test Plan:
pytest test/quantization/test_quantized.py
pytest test/quantization/test_quantized_module.py

Reviewers:

Subscribers:

Tasks:

Tags:

ghstack-source-id: 3ad2892fb90e634d48f56d860a942e98b0c70639
Pull Request resolved: pytorch/pytorch#37622
  • Loading branch information
kimishpatel committed May 7, 2020
1 parent 7c73070 commit b34e8cd
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 15 deletions.
15 changes: 13 additions & 2 deletions aten/src/ATen/native/quantized/cpu/qconv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,9 +536,20 @@ at::Tensor PackedConvWeightsQnnp<kSpatialDim>::apply_impl(
for (int i = 0; i < wt_numel; ++i) {
qnnp_w_data[i] = static_cast<c10::quint8>(w_data[i] + 128);
}
at::Tensor bias;
// Original bias was float, so we requantize it here.
auto bias = at::quantize_per_tensor(
bias_fp32, weight_scales_data[0] * act_input_scale, 0, c10::kQInt32);
if (is_per_channel) {
at::Tensor bias_quant_scales = weight_contig.q_per_channel_scales() * act_input_scale;
at::Tensor bias_zp = at::zeros(bias_quant_scales.sizes(), c10::kInt);
bias = at::native::quantize_per_channel_cpu(
bias_fp32, bias_quant_scales, bias_zp, 0, c10::kQInt32);
} else {
bias = at::native::quantize_per_tensor(
bias_fp32,
weight_contig.q_scale() * act_input_scale,
0,
c10::kQInt32);
}

conv_p = qnnpack::conv_param_t(
{kernel_w, kernel_h},
Expand Down
4 changes: 0 additions & 4 deletions aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,6 @@ c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeightsQnnp<kSpa
weight.ndimension() == 4,
"quantized::conv2d_prepack (qnnpack): Weights are expected to have 4 "
"dimensions");
TORCH_CHECK(
weight.qscheme() == c10::kPerTensorAffine,
"quantized::conv2d_prepack (qnnpack): only supports Per Tensor "
"Quantization Scheme")
TORCH_CHECK(
stride.size() == 2,
"quantized::conv2d_prepack (qnnpack): 2D convolution only");
Expand Down
4 changes: 4 additions & 0 deletions aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,10 @@ class QLinearDynamicInt8 final {

auto& pack_ptr =
cpp_custom_type_hack::cast<PackedLinearWeightsQnnp>(packed_weight);
TORCH_CHECK(
pack_ptr.orig_weight.qscheme() == kPerTensorAffine,
"quantized::linear_dynamic (qnnpack) only supports "
"Per Tensor Quantization Scheme");
auto packB = pack_ptr.w.get();
// Adjust weight zero point, similar to weight data.
auto kernel_zp = pack_ptr.orig_weight.q_zero_point() + 128;
Expand Down
3 changes: 0 additions & 3 deletions aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,6 @@ class QLinearPackWeightInt8 final {
TORCH_CHECK(
weight.dim() == 2,
"quantized::linear_prepack (qnnpack): Weight tensor rank should be == 2");
TORCH_CHECK(
weight.qscheme() == kPerTensorAffine,
"quantized::linear_prepack (qnnpack) only supports Per Tensor Quantization Scheme")

int64_t rows_w = weight.size(0);
Tensor bias_fp32;
Expand Down
1 change: 0 additions & 1 deletion test/quantization/test_quantized_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def test_conv2d_api(
if qengine == 'qnnpack':
if IS_PPC or TEST_WITH_UBSAN:
return
use_channelwise = False

input_feature_map_size = (H, W)
kernel_size = (kernel_h, kernel_w)
Expand Down
1 change: 0 additions & 1 deletion test/quantization/test_quantized_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,6 @@ def test_conv2d_api(
if qengine == 'qnnpack':
if IS_PPC or TEST_WITH_UBSAN:
return
use_channelwise = False

in_channels = in_channels_per_group * groups
out_channels = out_channels_per_group * groups
Expand Down
11 changes: 7 additions & 4 deletions test/quantization/test_quantized_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -1932,7 +1932,6 @@ def test_qlinear(self, batch_size, input_channels, output_channels, use_bias,
# QNNPACK qlinear is flaky on MACOS. Issue #27326
if IS_PPC or TEST_WITH_UBSAN or IS_MACOS:
return
use_channelwise = False
use_multi_dim_input = False
# QNNPACK supports uint8 in the kernels. In the op we shift the int8
# weight values to uint8 to be on par with fbgemm. However, this causes
Expand Down Expand Up @@ -2054,7 +2053,6 @@ def test_qlinear_unpack(self, W, use_channelwise, qengine):
if qengine == 'qnnpack':
if IS_PPC or TEST_WITH_UBSAN:
return
use_channelwise = False

with override_quantized_engine(qengine):
W, (W_scale, W_zp, torch_type) = W
Expand Down Expand Up @@ -2310,13 +2308,19 @@ def test_qconv2d(
use_channelwise,
qengine
):
# Added this to make it more likely to find a failing example.
if use_channelwise and qengine == 'qnnpack':
use_bias = True
groups = 1
output_channels_per_group = \
2 if output_channels_per_group < 2 else output_channels_per_group

if qengine not in torch.backends.quantized.supported_engines:
return
if qengine == 'qnnpack':
# QNNPACK qconv is flaky on MACOS. Issue #27326
if IS_PPC or TEST_WITH_UBSAN or IS_MACOS:
return
use_channelwise = False

input_channels = input_channels_per_group * groups
output_channels = output_channels_per_group * groups
Expand Down Expand Up @@ -2374,7 +2378,6 @@ def test_qconv_unpack(
if qengine == 'qnnpack':
if IS_PPC or TEST_WITH_UBSAN:
return
channelwise = False

with override_quantized_engine(qengine):
qconv_prepack = torch.ops.quantized.conv2d_prepack
Expand Down

0 comments on commit b34e8cd

Please sign in to comment.