[ao][sparsity] make sparsity and PTQ compose (pytorch#74845)

HDCharles · facebook-github-bot · commit ed5cdb7b636c · 2022-04-04T20:30:10.000-07:00
Summary: Pull Request resolved: pytorch#74845 This PR adds support for quantization flow to detect parametrized modules and match them using their original module types. This mainly involved using the new type_before_parametrizations function rather than type to check for module mathcing Test Plan: python test/test_ao_sparsity.py TestComposability Imported from OSS Reviewed By: jerryzh168 Differential Revision: D35240274 fbshipit-source-id: 7294d89c9c2e069e51d8b9bafa45c15f92bed124
diff --git a/test/ao/sparsity/test_composability.py b/test/ao/sparsity/test_composability.py
@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+# Owner(s): ["module: unknown"]
+
+
+import logging
+
+import torch
+import torch.quantization as tq
+from torch import nn
+from torch.ao import sparsity
+from torch.testing._internal.common_utils import TestCase
+
+logging.basicConfig(
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO
+)
+
+sparse_defaults = {
+    "sparsity_level": 0.8,
+    "sparse_block_shape": (1, 4),
+    "zeros_per_block": 4,
+}
+
+
+class TestComposability(TestCase):
+    def _get_model_and_sparsifier_and_sparse_config(self):
+        model = nn.Sequential(
+            nn.Linear(4, 4),  # 0
+            nn.ReLU(),
+            nn.Linear(4, 4),  # 2
+            nn.ReLU(),
+            tq.QuantStub(),
+            nn.Linear(4, 4),  # 5
+            nn.Identity(),
+            # nn.ReLU(), not testing fusion yet
+            tq.DeQuantStub(),
+        )
+        model[5].qconfig = torch.ao.quantization.get_default_qconfig("fbgemm")
+        model[4].qconfig = torch.ao.quantization.get_default_qconfig("fbgemm")
+
+        sparsifier = sparsity.WeightNormSparsifier(**sparse_defaults)
+
+        sparse_config = [
+            {
+                "module": model[5],
+                "sparsity_level": 0.7,
+                "sparse_block_shape": (1, 4),
+                "zeros_per_block": 4,
+            },
+            model[0],
+        ]
+        return model, sparsifier, sparse_config
+
+    def _check_parametrizations_and_observers(self, model):
+        self.assertTrue(hasattr(model[0], "parametrizations"))
+        self.assertTrue(hasattr(model[5], "parametrizations"))
+        self.assertTrue(hasattr(model[5], "activation_post_process"))
+
+    def _squash_mask_calibrate_and_convert(self, model, sparsifier, input):
+        sparsifier.step()
+        sparsifier.squash_mask()
+        model(input)
+        tq.convert(model, inplace=True)
+
+    def test_q_prep_before_s_prep(self):
+        (
+            mod,
+            sparsifier,
+            sparse_config,
+        ) = self._get_model_and_sparsifier_and_sparse_config()
+
+        tq.prepare(mod, inplace=True)
+        sparsifier.prepare(mod, config=sparse_config)
+        self._check_parametrizations_and_observers(mod)
+        self._squash_mask_calibrate_and_convert(
+            mod, sparsifier, torch.randn(1, 4, 4, 4)
+        )
+        self.assertTrue(isinstance(mod[5], torch.nn.quantized.Linear))
+        self.assertEqual(mod(torch.randn(1, 4, 4, 4)).shape, torch.Size([1, 4, 4, 4]))
+
+    def test_s_prep_before_q_prep(self):
+        (
+            mod,
+            sparsifier,
+            sparse_config,
+        ) = self._get_model_and_sparsifier_and_sparse_config()
+
+        sparsifier.prepare(mod, config=sparse_config)
+        torch.quantization.prepare(mod, inplace=True)
+        self._check_parametrizations_and_observers(mod)
+        self._squash_mask_calibrate_and_convert(
+            mod, sparsifier, torch.randn(1, 4, 4, 4)
+        )
+        self.assertTrue(isinstance(mod[5], torch.nn.quantized.Linear))
+        self.assertEqual(mod(torch.randn(1, 4, 4, 4)).shape, torch.Size([1, 4, 4, 4]))
diff --git a/test/test_ao_sparsity.py b/test/test_ao_sparsity.py
@@ -20,5 +20,8 @@
 # Scheduler
 from ao.sparsity.test_scheduler import TestScheduler  # noqa: F401
 
+# Composability
+from ao.sparsity.test_composability import TestComposability  # noqa: F401
+
 if __name__ == '__main__':
     run_tests()
diff --git a/torch/ao/quantization/quantization_mappings.py b/torch/ao/quantization/quantization_mappings.py
@@ -23,6 +23,7 @@
     default_symmetric_fixed_qparams_fake_quant,
 )
 from torch.ao.quantization.utils import get_combined_dict
+from torch.nn.utils.parametrize import type_before_parametrizations
 
 # Default map for swapping float module to reference quantized modules
 DEFAULT_REFERENCE_STATIC_QUANT_MODULE_MAPPINGS : Dict[Callable, Any] = {
@@ -306,7 +307,7 @@ def _get_special_act_post_process(module: torch.nn.Module) -> Optional[Callable]
     input: torch.nn.Sigmoid
     output: default_affine_fixed_qparam_fake_quant
     """
-    return DEFAULT_MODULE_TO_ACT_POST_PROCESS.get(type(module), None)
+    return DEFAULT_MODULE_TO_ACT_POST_PROCESS.get(type_before_parametrizations(module), None)
 
 def _has_special_act_post_process(module: torch.nn.Module) -> bool:
     return module.training and type(module) in DEFAULT_MODULE_TO_ACT_POST_PROCESS
diff --git a/torch/ao/quantization/quantize.py b/torch/ao/quantization/quantize.py
@@ -17,7 +17,7 @@
     _has_special_act_post_process,
     _get_special_act_post_process,
 )
-from .utils import get_qparam_dict
+from .utils import get_qparam_dict, has_no_children_ignoring_parametrizations
 from torch.ao.quantization.stubs import DeQuantStub, QuantWrapper
 from torch.ao.quantization.qconfig import (
     add_module_to_qconfig_obs_ctr,
@@ -26,6 +26,7 @@
     float_qparams_weight_only_qconfig,
     float_qparams_weight_only_qconfig_4bit,
     activation_is_memoryless)
+from torch.nn.utils.parametrize import type_before_parametrizations
 
 def is_activation_post_process(module):
     return (isinstance(module, torch.ao.quantization.ObserverBase) or
@@ -170,9 +171,9 @@ def insert_activation_post_process(m, special_act_post_process=None):
 
     for name, child in module.named_children():
         # TODO remove Dropout special after codebase stable
-        if type(child) in [nn.Dropout]:
+        if type_before_parametrizations(child) in [nn.Dropout]:
             continue
-        elif type(child) in [nnq.FloatFunctional, nnq.QFunctional]:
+        elif type_before_parametrizations(child) in [nnq.FloatFunctional, nnq.QFunctional]:
             if needs_observation(child):
                 child.activation_post_process = get_activation_post_process(child.qconfig, device)
         elif isinstance(child, _FusedModule):
@@ -182,23 +183,23 @@ def insert_activation_post_process(m, special_act_post_process=None):
         elif _has_special_act_post_process(child):
             special_act_post_process = _get_special_act_post_process(child)
             insert_activation_post_process(child, special_act_post_process)
-        elif non_leaf_module_list is not None and type(child) in non_leaf_module_list:
+        elif non_leaf_module_list is not None and type_before_parametrizations(child) in non_leaf_module_list:
             if needs_observation(child):
                 insert_activation_post_process(child)
-        elif needs_observation(child) and type(child) in custom_module_class_mapping:
-            observed_child = custom_module_class_mapping[type(child)].from_float(child)
+        elif needs_observation(child) and type_before_parametrizations(child) in custom_module_class_mapping:
+            observed_child = custom_module_class_mapping[type_before_parametrizations(child)].from_float(child)
             setattr(module, name, observed_child)
             # TODO: These are the modules that cannot be observed
             #       Once there are more, we should move them to a separate list
-            if custom_module_class_mapping[type(child)] not in no_observer_set():
+            if custom_module_class_mapping[type_before_parametrizations(child)] not in no_observer_set():
                 insert_activation_post_process(observed_child)
         else:
             add_observer_(child, qconfig_propagation_list, non_leaf_module_list, device, custom_module_class_mapping)
 
     # Insert observers only for leaf nodes, note that this observer is for
     # the output of the module, for input QuantStub will observe them
-    if len(module._modules) == 0 and not isinstance(module, torch.nn.Sequential) \
-       and type(module) in qconfig_propagation_list:
+    if has_no_children_ignoring_parametrizations(module) and not isinstance(module, torch.nn.Sequential) \
+       and type_before_parametrizations(module) in qconfig_propagation_list:
         insert_activation_post_process(module)
 
 def get_unique_devices_(module):
@@ -220,7 +221,7 @@ def add_quant_dequant(module):
         wraps the input module, the latter case only happens when the input
         module is a leaf module and we want to quantize it.
     """
-    if len(module._modules) == 0 and hasattr(module, 'qconfig') and module.qconfig:
+    if has_no_children_ignoring_parametrizations(module) and hasattr(module, 'qconfig') and module.qconfig:
         return QuantWrapper(module)
 
     for name, child in module.named_children():
diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py
@@ -6,6 +6,7 @@
 import torch
 from torch.ao.quantization.quant_type import QuantType, quant_type_to_str
 from typing import Tuple, Any, Union, Callable
+from torch.nn.utils.parametrize import is_parametrized
 
 # Type for fusion patterns, it can be more complicated than the following actually,
 # see pattern.md for docs
@@ -356,3 +357,16 @@ def _parent_name(target):
         return '', r[0]
     else:
         return r[0], r[1]
+
+def has_no_children_ignoring_parametrizations(module):
+    """
+    Checks if module._modules is empty or
+    if module is a parametrization, checks that module._modules only has
+    the 'parametrizations' module
+    """
+    if len(module._modules) == 0:
+        return True
+    elif is_parametrized(module):
+        return len(module._modules) == 1 and 'parametrizations' in module._modules
+    else:
+        return False
diff --git a/torch/nn/utils/parametrize.py b/torch/nn/utils/parametrize.py
@@ -573,7 +573,6 @@ def is_parametrized(module: Module, tensor_name: Optional[str] = None) -> bool:
     else:
         return tensor_name in parametrizations
 
-
 def remove_parametrizations(
     module: Module, tensor_name: str, leave_parametrized: bool = True
 ) -> Module:
@@ -644,3 +643,15 @@ def remove_parametrizations(
         orig_cls = module.__class__.__bases__[0]
         module.__class__ = orig_cls
     return module
+
+def type_before_parametrizations(module: Module) -> type:
+    r"""Returns the module type before parametrizations were applied and if not,
+    then it returns the module type.
+
+     Args:
+        module (nn.Module): module to get type of
+    """
+    if is_parametrized(module):
+        return module.__class__.__bases__[0]
+    else:
+        return type(module)