[quant][fx] Only do reference moduel swapping for floating point fused modules (#74231)

jerryzh168 · pytorchmergebot · commit dbf43d621d35 · 2022-03-18T22:20:16.000Z
Summary: Pull Request resolved: #74231 Add a check to make sure the weighted modules we swap is actually a float fused module, since the reference fused module like reference version of linear - relu would have the same fused type as the floating point linear - relu (and the linear submodule will have different types) Test Plan: phabricator diff for now, can add a test case after we know exactly what the problem is Reviewed By: andrewor14 Differential Revision: D34888290 fbshipit-source-id: a7f53368a7c17f7d1a82afaa50d14d569b4923df (cherry picked from commit 458dac9)
diff --git a/test/quantization/fx/test_quantize_fx.py b/test/quantization/fx/test_quantize_fx.py
@@ -3600,6 +3600,45 @@ def forward(self, x):
             ]
             self.checkGraphModuleNodes(m, expected_node_list=node_list)
 
+    @skipIfNoFBGEMM
+    def test_dynamic_with_fusion_multiple_uses(self):
+        """
+        Tests that dynamic quantization APIs work with Linear + Relu fusion
+        """
+        class LinearRelu(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.linear = torch.nn.Linear(5, 5)
+                self.relu = torch.nn.ReLU()
+
+            def forward(self, x):
+                x = self.linear(x)
+                return self.relu(x)
+
+        class M(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.linear_relu = LinearRelu()
+
+            def forward(self, x):
+                x = self.linear_relu(x)
+                x = self.linear_relu(x)
+                return x
+
+        for qconfig in [float16_dynamic_qconfig, default_dynamic_qconfig]:
+            model = M().eval()
+            qconfig_dict = {
+                "": qconfig
+            }
+            m = prepare_fx(model, qconfig_dict)
+            m = convert_fx(m)
+            m(torch.rand(5, 5))
+            node_list = [
+                ns.call_module(nniqd.LinearReLU),
+                ns.call_module(nniqd.LinearReLU),
+            ]
+            self.checkGraphModuleNodes(m, expected_node_list=node_list)
+
     def test_ref_linear_module(self):
         """ Make sure the numerics for models with ref linear module
         matches models with fbgemm/qnnpack module
diff --git a/torch/ao/quantization/fx/convert.py b/torch/ao/quantization/fx/convert.py
@@ -68,6 +68,13 @@
     torch.nn.intrinsic.ConvReLU3d,
 )
 
+FLOAT_WEIGHTED_MODULE_CLASSES = (
+    torch.nn.Linear,
+    torch.nn.Conv1d,
+    torch.nn.Conv2d,
+    torch.nn.Conv3d,
+)
+
 QAT_MODULE_CLASSES = (
     torch.nn.qat.Linear,
     torch.nn.qat.Conv2d,
@@ -746,6 +753,11 @@ def replace_observer_with_dequantize_node(node: Node, graph: Graph):
                     node, modules, model, is_reference, backend_config_dict)
             elif type(modules[node.target]) in set(
                     weighted_module_classes).union(QAT_MODULE_CLASSES).union(FUSED_MODULE_CLASSES):
+                # extra check for fused module classes to make sure they are fused module classes
+                # of target modules
+                if type(modules[node.target]) in FUSED_MODULE_CLASSES and \
+                   type(modules[node.target][0]) not in FLOAT_WEIGHTED_MODULE_CLASSES:
+                    continue
                 convert_weighted_module(
                     node, modules, observed_node_names, quantized_reference_module_mapping, qconfig_map)
             elif type(modules[node.target]) in custom_module_classes: