set per channel quant for weight (#3709)

cccclai · facebook-github-bot · commit 9d12efd972a7 · 2024-05-24T13:23:19.000-07:00
Summary: Pull Request resolved: #3709 As title, verified with stories model and the accuracy is better. Reviewed By: kirklandsign Differential Revision: D57655227 fbshipit-source-id: 6257aaafb26f1a91c749c4fc1e2efca609e07935
diff --git a/examples/models/llama2/lib/quant_lib.py b/examples/models/llama2/lib/quant_lib.py
@@ -158,6 +158,8 @@ def get_qnn_quantizer(args):
         backend == "qnn"
     ), f"The quantization config is for backend {backend} instead of qnn."
     qnn_quantizer = QnnQuantizer()
+    qnn_quantizer.set_per_channel_conv_quant(enable=True)
+    qnn_quantizer.set_per_channel_linear_quant(enable=True)
     # more custom quantization are supported including 16a4w etc. default to 8bit quantized
     custom_annotations = ()
     if quant_config == "8a8w":