diff --git a/backends/arm/quantizer/TARGETS b/backends/arm/quantizer/TARGETS index bbd7322daf5..5027bc87a55 100644 --- a/backends/arm/quantizer/TARGETS +++ b/backends/arm/quantizer/TARGETS @@ -1,5 +1,15 @@ load("@fbcode_macros//build_defs:python_library.bzl", "python_library") +# Exposed through __init__.py +python_library( + name = "quantization_config", + srcs = ["quantization_config.py"], + deps = [ + "//caffe2:torch", + ], +) + +# Exposed through __init__.py python_library( name = "arm_quantizer", srcs = ["arm_quantizer.py"], @@ -22,17 +32,19 @@ python_library( ) python_library( - name = "quantization_config", - srcs = ["quantization_config.py"], + name = "arm_quantizer_utils", + srcs = ["arm_quantizer_utils.py"], deps = [ - "//caffe2:torch", + ":quantization_config", ], ) python_library( - name = "arm_quantizer_utils", - srcs = ["arm_quantizer_utils.py"], + name = "lib", + srcs = ["__init__.py"], deps = [ + ":arm_quantizer", ":quantization_config", - ], + ":arm_quantizer_utils", + ] ) diff --git a/backends/arm/quantizer/__init__.py b/backends/arm/quantizer/__init__.py index 9743c95a143..d663ca4938f 100644 --- a/backends/arm/quantizer/__init__.py +++ b/backends/arm/quantizer/__init__.py @@ -1,4 +1,15 @@ -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. + + +from .quantization_config import QuantizationConfig # noqa # usort: skip +from .arm_quantizer import ( # noqa + EthosUQuantizer, + get_symmetric_quantization_config, + TOSAQuantizer, +) + +# Used in tests +from .arm_quantizer_utils import is_annotated # noqa diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py index ee08f8e9eec..094819b401e 100644 --- a/backends/arm/quantizer/arm_quantizer.py +++ b/backends/arm/quantizer/arm_quantizer.py @@ -19,16 +19,11 @@ import torch from executorch.backends.arm._passes import ArmPassManager -from executorch.backends.arm.quantizer import arm_quantizer_utils -from executorch.backends.arm.quantizer.arm_quantizer_utils import ( # type: ignore[attr-defined] - mark_node_as_annotated, -) -from executorch.backends.arm.quantizer.quantization_annotator import ( # type: ignore[import-not-found] - annotate_graph, -) - -from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig +from executorch.backends.arm.quantizer import QuantizationConfig from executorch.backends.arm.tosa_specification import TosaSpecification + +from .arm_quantizer_utils import is_annotated, mark_node_as_annotated +from .quantization_annotator import annotate_graph from executorch.backends.arm.arm_backend import ( get_tosa_spec, is_ethosu, @@ -337,7 +332,7 @@ def _annotate_io( quantization_config: QuantizationConfig, ): for node in model.graph.nodes: - if arm_quantizer_utils.is_annotated(node): + if is_annotated(node): continue if node.op == "placeholder" and len(node.users) > 0: _annotate_output_qspec( diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py index 8c081e7c00a..5ac747177be 100644 --- a/backends/arm/quantizer/quantization_annotator.py +++ b/backends/arm/quantizer/quantization_annotator.py @@ -10,8 +10,7 @@ import torch import torch.fx -from executorch.backends.arm.quantizer import arm_quantizer_utils -from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig +from executorch.backends.arm.quantizer import QuantizationConfig from executorch.backends.arm.tosa_utils import get_node_debug_info from torch.ao.quantization.quantizer import QuantizationSpecBase, SharedQuantizationSpec from torch.ao.quantization.quantizer.utils import ( @@ -20,6 +19,13 @@ ) from torch.fx import Node +from .arm_quantizer_utils import ( + is_annotated, + is_ok_for_quantization, + is_output_annotated, + mark_node_as_annotated, +) + logger = logging.getLogger(__name__) @@ -69,7 +75,7 @@ def _is_ok_for_quantization( """ # Check output if quant_properties.quant_output is not None: - if not arm_quantizer_utils.is_ok_for_quantization(node, gm): # type: ignore[attr-defined] + if not is_ok_for_quantization(node, gm): # type: ignore[attr-defined] logger.debug( f"Could not quantize node due to output: " f"{get_node_debug_info(node, gm)}" @@ -87,7 +93,7 @@ def _is_ok_for_quantization( for n_arg in _as_list(node.args[quant_property.index]): assert isinstance(n_arg, Node) - if not arm_quantizer_utils.is_ok_for_quantization(n_arg, gm): # type: ignore[attr-defined] + if not is_ok_for_quantization(n_arg, gm): # type: ignore[attr-defined] logger.debug( f'could not quantize node due to input "{node}": ' f"{get_node_debug_info(node, gm)}" @@ -99,7 +105,7 @@ def _is_ok_for_quantization( def _annotate_input(node: Node, quant_property: _QuantProperty): - assert not arm_quantizer_utils.is_annotated(node) + assert not is_annotated(node) if quant_property.optional and ( quant_property.index >= len(node.args) or node.args[quant_property.index] is None @@ -114,11 +120,11 @@ def _annotate_input(node: Node, quant_property: _QuantProperty): assert isinstance(n_arg, Node) _annotate_input_qspec_map(node, n_arg, qspec) if quant_property.mark_annotated: - arm_quantizer_utils.mark_node_as_annotated(n_arg) # type: ignore[attr-defined] + mark_node_as_annotated(n_arg) # type: ignore[attr-defined] def _annotate_output(node: Node, quant_property: _QuantProperty): - assert not arm_quantizer_utils.is_annotated(node) + assert not is_annotated(node) assert not quant_property.mark_annotated assert not quant_property.optional assert quant_property.index == 0, "Only one output annotation supported currently" @@ -343,7 +349,7 @@ def any_or_hardtanh_min_zero(n: Node): elif node.target in _one_to_one_shared_input_or_input_act_qspec: input_qspec = ( SharedQuantizationSpec(node.args[0]) # type: ignore[arg-type] - if arm_quantizer_utils.is_output_annotated(node.args[0]) # type: ignore + if is_output_annotated(node.args[0]) # type: ignore else input_act_qspec ) quant_properties.quant_inputs = [_QuantProperty(0, input_qspec)] # type: ignore[arg-type] @@ -396,7 +402,7 @@ def any_or_hardtanh_min_zero(n: Node): if not isinstance(node.args[0], Node): return None - if not arm_quantizer_utils.is_output_annotated(node.args[0]): # type: ignore[attr-defined] + if not is_output_annotated(node.args[0]): # type: ignore[attr-defined] return None shared_qspec = SharedQuantizationSpec(node.args[0]) @@ -426,7 +432,7 @@ def annotate_graph( # type: ignore[return] if node.op != "call_function": continue - if arm_quantizer_utils.is_annotated(node): + if is_annotated(node): continue if filter_fn is not None and not filter_fn(node): @@ -442,7 +448,7 @@ def annotate_graph( # type: ignore[return] if quant_properties.quant_output is not None: _annotate_output(node, quant_properties.quant_output) - arm_quantizer_utils.mark_node_as_annotated(node) # type: ignore[attr-defined] + mark_node_as_annotated(node) # type: ignore[attr-defined] # Quantization does not allow kwargs for some reason. # Remove from ops we know have and where we know it does not break anything. diff --git a/backends/arm/test/TARGETS b/backends/arm/test/TARGETS index 38a4ec18787..a82d4da6bc2 100644 --- a/backends/arm/test/TARGETS +++ b/backends/arm/test/TARGETS @@ -42,7 +42,7 @@ python_library( ":common", "//executorch/backends/xnnpack/test/tester:tester", "//executorch/backends/arm:arm_partitioner", - "//executorch/backends/arm/quantizer:arm_quantizer", + "//executorch/backends/arm/quantizer:lib", "//executorch/backends/arm:tosa_mapping", "//executorch/devtools/backend_debug:delegation_info", "fbsource//third-party/pypi/tabulate:tabulate", diff --git a/backends/arm/test/ops/test_expand.py b/backends/arm/test/ops/test_expand.py index b644e729bb4..cd073bddcc8 100644 --- a/backends/arm/test/ops/test_expand.py +++ b/backends/arm/test/ops/test_expand.py @@ -15,7 +15,7 @@ import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( +from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, diff --git a/backends/arm/test/ops/test_hardtanh.py b/backends/arm/test/ops/test_hardtanh.py index 6742b398ef5..46b44078785 100644 --- a/backends/arm/test/ops/test_hardtanh.py +++ b/backends/arm/test/ops/test_hardtanh.py @@ -13,7 +13,7 @@ import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( +from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py index 2f3426f2dda..657597f9058 100644 --- a/backends/arm/test/ops/test_max_pool.py +++ b/backends/arm/test/ops/test_max_pool.py @@ -12,7 +12,7 @@ import pytest import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( +from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, diff --git a/backends/arm/test/ops/test_permute.py b/backends/arm/test/ops/test_permute.py index e71b6687865..50db1231b41 100644 --- a/backends/arm/test/ops/test_permute.py +++ b/backends/arm/test/ops/test_permute.py @@ -13,7 +13,7 @@ import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( +from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, diff --git a/backends/arm/test/ops/test_relu.py b/backends/arm/test/ops/test_relu.py index 6977d30f2fa..3fc64c89be1 100644 --- a/backends/arm/test/ops/test_relu.py +++ b/backends/arm/test/ops/test_relu.py @@ -10,7 +10,7 @@ from typing import Tuple import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( +from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, diff --git a/backends/arm/test/ops/test_repeat.py b/backends/arm/test/ops/test_repeat.py index 7ea4d15120a..da2770cfafe 100644 --- a/backends/arm/test/ops/test_repeat.py +++ b/backends/arm/test/ops/test_repeat.py @@ -13,7 +13,7 @@ import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( +from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, diff --git a/backends/arm/test/ops/test_sigmoid_16bit.py b/backends/arm/test/ops/test_sigmoid_16bit.py index 240000e6973..3cd11699a0a 100644 --- a/backends/arm/test/ops/test_sigmoid_16bit.py +++ b/backends/arm/test/ops/test_sigmoid_16bit.py @@ -6,7 +6,7 @@ import pytest import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( +from executorch.backends.arm.quantizer import ( get_symmetric_quantization_config, TOSAQuantizer, ) diff --git a/backends/arm/test/ops/test_sigmoid_32bit.py b/backends/arm/test/ops/test_sigmoid_32bit.py index 14808eedaf9..fbfc263a6d0 100644 --- a/backends/arm/test/ops/test_sigmoid_32bit.py +++ b/backends/arm/test/ops/test_sigmoid_32bit.py @@ -5,7 +5,7 @@ import pytest import torch -from executorch.backends.arm.quantizer.arm_quantizer import TOSAQuantizer +from executorch.backends.arm.quantizer import TOSAQuantizer from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.test_pipeline import ( diff --git a/backends/arm/test/ops/test_var.py b/backends/arm/test/ops/test_var.py index 6690c668f94..fb23f24307e 100644 --- a/backends/arm/test/ops/test_var.py +++ b/backends/arm/test/ops/test_var.py @@ -11,7 +11,7 @@ import unittest import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( +from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, diff --git a/backends/arm/test/ops/test_where.py b/backends/arm/test/ops/test_where.py index dd4f3326f8e..91d616232fa 100644 --- a/backends/arm/test/ops/test_where.py +++ b/backends/arm/test/ops/test_where.py @@ -9,7 +9,7 @@ import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( +from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, diff --git a/backends/arm/test/quantizer/test_generic_annotater.py b/backends/arm/test/quantizer/test_generic_annotater.py index 61be0ccb3e9..275a44583dc 100644 --- a/backends/arm/test/quantizer/test_generic_annotater.py +++ b/backends/arm/test/quantizer/test_generic_annotater.py @@ -7,7 +7,7 @@ import unittest import torch -from executorch.backends.arm.quantizer.arm_quantizer_utils import is_annotated +from executorch.backends.arm.quantizer import is_annotated from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.arm_tester import ArmTester from torch.fx.passes.utils.source_matcher_utils import get_source_partitions diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl index 9f5bb778e78..1993df0d091 100644 --- a/backends/arm/test/targets.bzl +++ b/backends/arm/test/targets.bzl @@ -19,6 +19,11 @@ def define_arm_tests(): "ops/test_tanh.py", ] + # Quantization + test_files += [ + "quantizer/test_generic_annotater.py", + ] + TESTS = {} for test_file in test_files: diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py index 6346a53edef..d7434e7149a 100644 --- a/backends/arm/test/tester/arm_tester.py +++ b/backends/arm/test/tester/arm_tester.py @@ -27,7 +27,7 @@ is_tosa, ) from executorch.backends.arm.ethosu_partitioner import EthosUPartitioner -from executorch.backends.arm.quantizer.arm_quantizer import ( +from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, diff --git a/backends/arm/test/tester/test_pipeline.py b/backends/arm/test/tester/test_pipeline.py index cad62c021f6..95d9898c19d 100644 --- a/backends/arm/test/tester/test_pipeline.py +++ b/backends/arm/test/tester/test_pipeline.py @@ -8,7 +8,7 @@ import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( +from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, diff --git a/backends/arm/tosa_quant_utils.py b/backends/arm/tosa_quant_utils.py index 0cfa19eb453..afdbf78422e 100644 --- a/backends/arm/tosa_quant_utils.py +++ b/backends/arm/tosa_quant_utils.py @@ -10,8 +10,6 @@ import math from typing import cast, List, NamedTuple, Tuple -import executorch.backends.arm.tosa_mapping - import torch.fx import torch.fx.node @@ -234,7 +232,7 @@ def build_rescale( def build_rescale_to_int32( tosa_fb: ts.TosaSerializer, - input_arg: executorch.backends.arm.tosa_mapping.TosaArg, + input_arg: TosaArg, input_zp: int, rescale_scale: list[float], is_scale32: bool = True, diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index cc54f0bf79e..5b3d39d9f6d 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -24,7 +24,7 @@ is_tosa, ) from executorch.backends.arm.ethosu_partitioner import EthosUPartitioner -from executorch.backends.arm.quantizer.arm_quantizer import ( +from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, TOSAQuantizer, diff --git a/examples/arm/ethos_u_minimal_example.ipynb b/examples/arm/ethos_u_minimal_example.ipynb index d73695e9d48..ab62d0dd2ce 100644 --- a/examples/arm/ethos_u_minimal_example.ipynb +++ b/examples/arm/ethos_u_minimal_example.ipynb @@ -80,7 +80,7 @@ "outputs": [], "source": [ "from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder\n", - "from executorch.backends.arm.quantizer.arm_quantizer import (\n", + "from executorch.backends.arm.quantizer import (\n", " EthosUQuantizer,\n", " get_symmetric_quantization_config,\n", ")\n", @@ -89,7 +89,7 @@ "target = \"ethos-u55-128\"\n", "\n", "# Create a compilation spec describing the target for configuring the quantizer\n", - "# Some args are used by the Arm Vela graph compiler later in the example. Refer to Arm Vela documentation for an \n", + "# Some args are used by the Arm Vela graph compiler later in the example. Refer to Arm Vela documentation for an\n", "# explanation of its flags: https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela/-/blob/main/OPTIONS.md\n", "spec_builder = ArmCompileSpecBuilder().ethosu_compile_spec(\n", " target,\n", @@ -100,12 +100,12 @@ "compile_spec = spec_builder.build()\n", "\n", "# Create and configure quantizer to use a symmetric quantization config globally on all nodes\n", - "quantizer = EthosUQuantizer(compile_spec) \n", + "quantizer = EthosUQuantizer(compile_spec)\n", "operator_config = get_symmetric_quantization_config(is_per_channel=False)\n", "quantizer.set_global(operator_config)\n", "\n", "# Post training quantization\n", - "quantized_graph_module = prepare_pt2e(graph_module, quantizer) \n", + "quantized_graph_module = prepare_pt2e(graph_module, quantizer)\n", "quantized_graph_module(*example_inputs) # Calibrate the graph module with the example input\n", "quantized_graph_module = convert_pt2e(quantized_graph_module)\n", "\n", @@ -128,8 +128,8 @@ "metadata": {}, "outputs": [], "source": [ - "import subprocess \n", - "import os \n", + "import subprocess\n", + "import os\n", "\n", "# Setup paths\n", "cwd_dir = os.getcwd()\n", @@ -170,9 +170,9 @@ " to_edge_transform_and_lower,\n", ")\n", "from executorch.extension.export_util.utils import save_pte_program\n", - "import platform \n", + "import platform\n", "\n", - "# Create partitioner from compile spec \n", + "# Create partitioner from compile spec\n", "partitioner = EthosUPartitioner(compile_spec)\n", "\n", "# Lower the exported program to the Ethos-U backend\n", @@ -185,8 +185,8 @@ " )\n", "\n", "# Load quantization ops library\n", - "os_aot_lib_names = {\"Darwin\" : \"libquantized_ops_aot_lib.dylib\", \n", - " \"Linux\" : \"libquantized_ops_aot_lib.so\", \n", + "os_aot_lib_names = {\"Darwin\" : \"libquantized_ops_aot_lib.dylib\",\n", + " \"Linux\" : \"libquantized_ops_aot_lib.so\",\n", " \"Windows\": \"libquantized_ops_aot_lib.dll\"}\n", "aot_lib_name = os_aot_lib_names[platform.system()]\n", "\n", @@ -226,7 +226,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Build executorch \n", + "# Build executorch\n", "subprocess.run(os.path.join(script_dir, \"build_executorch.sh\"), shell=True, cwd=et_dir)\n", "\n", "# Build portable kernels\n",