[quant] fix int16 quantization scale in conv weight

terrychenism · terrychenism · commit ba3b8ce4c903 · 2022-03-25T16:24:18.000-07:00
Summary: fix int16 quantization scale in conv weight Test Plan: python3 test/test_quantization.py TestQuantizeEagerOps.test_int16_reference_module Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 8466547 Pull Request resolved: #74665
diff --git a/test/quantization/eager/test_quantize_eager_ptq.py b/test/quantization/eager/test_quantize_eager_ptq.py
@@ -199,6 +199,77 @@ def test_linear(self):
             (16, 5)
         )
 
+    @override_qengines
+    def test_int16_reference_module(self):
+
+        class RefM(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.conv = nn.ConvTranspose2d(1, 1, 1)
+                self.quant1 = QuantStub()
+                self.dequant1 = DeQuantStub()
+                self.quant2 = QuantStub()
+                self.dequant2 = DeQuantStub()
+
+            def forward(self, x):
+                x = self.quant1(x)
+                x = self.dequant1(x)
+                x = self.conv(x)
+                x = self.quant2(x)
+                x = self.dequant2(x)
+                return x
+
+
+        input_size = (16, 1, 10, 10)
+        data = torch.randn(*input_size, dtype=torch.float)
+
+        original_ref_m = RefM()
+        rand_w = torch.randn_like(original_ref_m.conv.weight)
+        rand_b = torch.randn_like(original_ref_m.conv.bias)
+        original_ref_m.conv.weight = torch.nn.Parameter(rand_w, requires_grad=False)
+        original_ref_m.conv.bias = torch.nn.Parameter(rand_b, requires_grad=False)
+
+        qengine = torch.backends.quantized.engine
+        if qengine not in supported_qengines:
+            return
+        from torch.ao.quantization.observer import MovingAverageMinMaxObserver
+
+        weight_obs = MovingAverageMinMaxObserver.with_args(
+            dtype=torch.qint32,
+            # set qmin and qmax to represent qint16
+            quant_min=-1 * (2 ** 15),
+            quant_max=(2 ** 15) - 1,
+            qscheme=torch.per_tensor_symmetric,
+        )
+        act_obs = MovingAverageMinMaxObserver.with_args(
+            dtype=torch.qint32,
+            quant_min=-1 * (2 ** 15),
+            quant_max=(2 ** 15) - 1,
+        )
+        custom_qconfig = QConfig(activation=act_obs, weight=weight_obs)
+
+        # quantize the reference model
+        original_ref_m.eval()
+        original_ref_m.qconfig = custom_qconfig
+
+        ref_m = prepare(original_ref_m)
+        # calibration
+        ref_m(torch.randn(*input_size, dtype=torch.float))
+
+        ref_m = convert(ref_m, is_reference=True)
+
+        myobs = MovingAverageMinMaxObserver(averaging_constant=0.5,
+                                            dtype=torch.qint32,
+                                            # set qmin and qmax to represent qint16
+                                            quant_min=-1 * (2 ** 15),
+                                            quant_max=(2 ** 15) - 1,
+                                            qscheme=torch.per_tensor_symmetric,
+                                            )
+        result = myobs(rand_w)
+        qparams = myobs.calculate_qparams()
+        self.assertEqual(ref_m.conv.weight_scale, qparams[0])
+
+
     def _test_activation_op_impl(
             self, float_module_class, quantized_module_class, extra_module_kwargs):
         """ Implementation for testing common activation ops like leaky relu
diff --git a/torch/nn/quantized/_reference/modules/utils.py b/torch/nn/quantized/_reference/modules/utils.py
@@ -16,7 +16,7 @@ def _init_weight_qparams(self, weight_qparams, device):
             None, torch.per_tensor_affine, torch.per_channel_affine,
             torch.per_channel_affine_float_qparams], \
             Exception(f"qscheme: {self.weight_qscheme} is not support in reference quantized {self._get_name()}")
-        if self.weight_dtype in [torch.quint8, torch.qint8, torch.quint4x2]:
+        if self.weight_dtype in [torch.quint8, torch.qint8, torch.quint4x2, torch.qint32]:
             zero_point_dtype = weight_qparams["zero_point"].dtype if \
                 isinstance(weight_qparams["zero_point"], torch.Tensor) else \
                 torch.int
@@ -35,13 +35,12 @@ def _init_weight_qparams(self, weight_qparams, device):
                 self.register_buffer(
                     "weight_axis", torch.tensor(0, dtype=torch.int, device=device))
         else:
-            # added for TorchScriptability, not used
+            # added for TorchScriptability, and for torch.float
             self.register_buffer("weight_scale", torch.tensor(1.0, dtype=torch.float, device=device))
             self.register_buffer("weight_zero_point", torch.tensor(0, dtype=torch.int, device=device))
             self.register_buffer(
                 "weight_axis", torch.tensor(0, dtype=torch.int, device=device))
 
-
     def get_weight(self):
         """
         Fake quantize (quantize and dequantize) the weight with
@@ -105,7 +104,7 @@ def _quantize_weight(
             weight = torch.quantize_per_tensor(weight, weight_scale, weight_zero_point, weight_dtype)
             return weight
     elif weight_qscheme in [torch.per_channel_affine, torch.per_channel_affine_float_qparams]:
-        if weight_dtype in [torch.quint8, torch.qint8, torch.quint4x2]:
+        if weight_dtype in [torch.quint8, torch.qint8, torch.quint4x2, torch.qint32]:
             weight = torch.quantize_per_channel(
                 weight, weight_scale,
                 weight_zero_point, weight_axis.item(), weight_dtype)  # type: ignore[arg-type]