diff --git a/backends/arm/quantizer/TARGETS b/backends/arm/quantizer/TARGETS
index bbd7322daf5..5027bc87a55 100644
--- a/backends/arm/quantizer/TARGETS
+++ b/backends/arm/quantizer/TARGETS
@@ -1,5 +1,15 @@
 load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
 
+# Exposed through __init__.py
+python_library(
+    name = "quantization_config",
+    srcs = ["quantization_config.py"],
+    deps = [
+        "//caffe2:torch",
+    ],
+)
+
+# Exposed through __init__.py
 python_library(
     name = "arm_quantizer",
     srcs = ["arm_quantizer.py"],
@@ -22,17 +32,19 @@ python_library(
 )
 
 python_library(
-    name = "quantization_config",
-    srcs = ["quantization_config.py"],
+    name = "arm_quantizer_utils",
+    srcs = ["arm_quantizer_utils.py"],
     deps = [
-        "//caffe2:torch",
+        ":quantization_config",
     ],
 )
 
 python_library(
-    name = "arm_quantizer_utils",
-    srcs = ["arm_quantizer_utils.py"],
+    name = "lib",
+    srcs = ["__init__.py"],
     deps = [
+        ":arm_quantizer",
         ":quantization_config",
-    ],
+        ":arm_quantizer_utils",
+    ]
 )
diff --git a/backends/arm/quantizer/__init__.py b/backends/arm/quantizer/__init__.py
index 9743c95a143..d663ca4938f 100644
--- a/backends/arm/quantizer/__init__.py
+++ b/backends/arm/quantizer/__init__.py
@@ -1,4 +1,15 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+
+
+from .quantization_config import QuantizationConfig  # noqa  # usort: skip
+from .arm_quantizer import (  # noqa
+    EthosUQuantizer,
+    get_symmetric_quantization_config,
+    TOSAQuantizer,
+)
+
+# Used in tests
+from .arm_quantizer_utils import is_annotated  # noqa
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
index ee08f8e9eec..094819b401e 100644
--- a/backends/arm/quantizer/arm_quantizer.py
+++ b/backends/arm/quantizer/arm_quantizer.py
@@ -19,16 +19,11 @@
 import torch
 from executorch.backends.arm._passes import ArmPassManager
 
-from executorch.backends.arm.quantizer import arm_quantizer_utils
-from executorch.backends.arm.quantizer.arm_quantizer_utils import (  # type: ignore[attr-defined]
-    mark_node_as_annotated,
-)
-from executorch.backends.arm.quantizer.quantization_annotator import (  # type: ignore[import-not-found]
-    annotate_graph,
-)
-
-from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
+from executorch.backends.arm.quantizer import QuantizationConfig
 from executorch.backends.arm.tosa_specification import TosaSpecification
+
+from .arm_quantizer_utils import is_annotated, mark_node_as_annotated
+from .quantization_annotator import annotate_graph
 from executorch.backends.arm.arm_backend import (
     get_tosa_spec,
     is_ethosu,
@@ -337,7 +332,7 @@ def _annotate_io(
         quantization_config: QuantizationConfig,
     ):
         for node in model.graph.nodes:
-            if arm_quantizer_utils.is_annotated(node):
+            if is_annotated(node):
                 continue
             if node.op == "placeholder" and len(node.users) > 0:
                 _annotate_output_qspec(
diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py
index 8c081e7c00a..5ac747177be 100644
--- a/backends/arm/quantizer/quantization_annotator.py
+++ b/backends/arm/quantizer/quantization_annotator.py
@@ -10,8 +10,7 @@
 
 import torch
 import torch.fx
-from executorch.backends.arm.quantizer import arm_quantizer_utils
-from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
+from executorch.backends.arm.quantizer import QuantizationConfig
 from executorch.backends.arm.tosa_utils import get_node_debug_info
 from torch.ao.quantization.quantizer import QuantizationSpecBase, SharedQuantizationSpec
 from torch.ao.quantization.quantizer.utils import (
@@ -20,6 +19,13 @@
 )
 from torch.fx import Node
 
+from .arm_quantizer_utils import (
+    is_annotated,
+    is_ok_for_quantization,
+    is_output_annotated,
+    mark_node_as_annotated,
+)
+
 logger = logging.getLogger(__name__)
 
 
@@ -69,7 +75,7 @@ def _is_ok_for_quantization(
     """
     # Check output
     if quant_properties.quant_output is not None:
-        if not arm_quantizer_utils.is_ok_for_quantization(node, gm):  # type: ignore[attr-defined]
+        if not is_ok_for_quantization(node, gm):  # type: ignore[attr-defined]
             logger.debug(
                 f"Could not quantize node due to output: "
                 f"{get_node_debug_info(node, gm)}"
@@ -87,7 +93,7 @@ def _is_ok_for_quantization(
 
         for n_arg in _as_list(node.args[quant_property.index]):
             assert isinstance(n_arg, Node)
-            if not arm_quantizer_utils.is_ok_for_quantization(n_arg, gm):  # type: ignore[attr-defined]
+            if not is_ok_for_quantization(n_arg, gm):  # type: ignore[attr-defined]
                 logger.debug(
                     f'could not quantize node due to input "{node}": '
                     f"{get_node_debug_info(node, gm)}"
@@ -99,7 +105,7 @@ def _is_ok_for_quantization(
 
 
 def _annotate_input(node: Node, quant_property: _QuantProperty):
-    assert not arm_quantizer_utils.is_annotated(node)
+    assert not is_annotated(node)
     if quant_property.optional and (
         quant_property.index >= len(node.args)
         or node.args[quant_property.index] is None
@@ -114,11 +120,11 @@ def _annotate_input(node: Node, quant_property: _QuantProperty):
         assert isinstance(n_arg, Node)
         _annotate_input_qspec_map(node, n_arg, qspec)
         if quant_property.mark_annotated:
-            arm_quantizer_utils.mark_node_as_annotated(n_arg)  # type: ignore[attr-defined]
+            mark_node_as_annotated(n_arg)  # type: ignore[attr-defined]
 
 
 def _annotate_output(node: Node, quant_property: _QuantProperty):
-    assert not arm_quantizer_utils.is_annotated(node)
+    assert not is_annotated(node)
     assert not quant_property.mark_annotated
     assert not quant_property.optional
     assert quant_property.index == 0, "Only one output annotation supported currently"
@@ -343,7 +349,7 @@ def any_or_hardtanh_min_zero(n: Node):
     elif node.target in _one_to_one_shared_input_or_input_act_qspec:
         input_qspec = (
             SharedQuantizationSpec(node.args[0])  # type: ignore[arg-type]
-            if arm_quantizer_utils.is_output_annotated(node.args[0])  # type: ignore
+            if is_output_annotated(node.args[0])  # type: ignore
             else input_act_qspec
         )
         quant_properties.quant_inputs = [_QuantProperty(0, input_qspec)]  # type: ignore[arg-type]
@@ -396,7 +402,7 @@ def any_or_hardtanh_min_zero(n: Node):
         if not isinstance(node.args[0], Node):
             return None
 
-        if not arm_quantizer_utils.is_output_annotated(node.args[0]):  # type: ignore[attr-defined]
+        if not is_output_annotated(node.args[0]):  # type: ignore[attr-defined]
             return None
 
         shared_qspec = SharedQuantizationSpec(node.args[0])
@@ -426,7 +432,7 @@ def annotate_graph(  # type: ignore[return]
         if node.op != "call_function":
             continue
 
-        if arm_quantizer_utils.is_annotated(node):
+        if is_annotated(node):
             continue
 
         if filter_fn is not None and not filter_fn(node):
@@ -442,7 +448,7 @@ def annotate_graph(  # type: ignore[return]
         if quant_properties.quant_output is not None:
             _annotate_output(node, quant_properties.quant_output)
 
-        arm_quantizer_utils.mark_node_as_annotated(node)  # type: ignore[attr-defined]
+        mark_node_as_annotated(node)  # type: ignore[attr-defined]
 
         # Quantization does not allow kwargs for some reason.
         # Remove from ops we know have and where we know it does not break anything.
diff --git a/backends/arm/test/TARGETS b/backends/arm/test/TARGETS
index 38a4ec18787..a82d4da6bc2 100644
--- a/backends/arm/test/TARGETS
+++ b/backends/arm/test/TARGETS
@@ -42,7 +42,7 @@ python_library(
         ":common",
         "//executorch/backends/xnnpack/test/tester:tester",
         "//executorch/backends/arm:arm_partitioner",
-        "//executorch/backends/arm/quantizer:arm_quantizer",
+        "//executorch/backends/arm/quantizer:lib",
         "//executorch/backends/arm:tosa_mapping",
         "//executorch/devtools/backend_debug:delegation_info",
         "fbsource//third-party/pypi/tabulate:tabulate",
diff --git a/backends/arm/test/ops/test_expand.py b/backends/arm/test/ops/test_expand.py
index b644e729bb4..cd073bddcc8 100644
--- a/backends/arm/test/ops/test_expand.py
+++ b/backends/arm/test/ops/test_expand.py
@@ -15,7 +15,7 @@
 
 import torch
 
-from executorch.backends.arm.quantizer.arm_quantizer import (
+from executorch.backends.arm.quantizer import (
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
diff --git a/backends/arm/test/ops/test_hardtanh.py b/backends/arm/test/ops/test_hardtanh.py
index 6742b398ef5..46b44078785 100644
--- a/backends/arm/test/ops/test_hardtanh.py
+++ b/backends/arm/test/ops/test_hardtanh.py
@@ -13,7 +13,7 @@
 
 import torch
 
-from executorch.backends.arm.quantizer.arm_quantizer import (
+from executorch.backends.arm.quantizer import (
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py
index 2f3426f2dda..657597f9058 100644
--- a/backends/arm/test/ops/test_max_pool.py
+++ b/backends/arm/test/ops/test_max_pool.py
@@ -12,7 +12,7 @@
 import pytest
 
 import torch
-from executorch.backends.arm.quantizer.arm_quantizer import (
+from executorch.backends.arm.quantizer import (
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
diff --git a/backends/arm/test/ops/test_permute.py b/backends/arm/test/ops/test_permute.py
index e71b6687865..50db1231b41 100644
--- a/backends/arm/test/ops/test_permute.py
+++ b/backends/arm/test/ops/test_permute.py
@@ -13,7 +13,7 @@
 
 import torch
 
-from executorch.backends.arm.quantizer.arm_quantizer import (
+from executorch.backends.arm.quantizer import (
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
diff --git a/backends/arm/test/ops/test_relu.py b/backends/arm/test/ops/test_relu.py
index 6977d30f2fa..3fc64c89be1 100644
--- a/backends/arm/test/ops/test_relu.py
+++ b/backends/arm/test/ops/test_relu.py
@@ -10,7 +10,7 @@
 from typing import Tuple
 
 import torch
-from executorch.backends.arm.quantizer.arm_quantizer import (
+from executorch.backends.arm.quantizer import (
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
diff --git a/backends/arm/test/ops/test_repeat.py b/backends/arm/test/ops/test_repeat.py
index 7ea4d15120a..da2770cfafe 100644
--- a/backends/arm/test/ops/test_repeat.py
+++ b/backends/arm/test/ops/test_repeat.py
@@ -13,7 +13,7 @@
 
 import torch
 
-from executorch.backends.arm.quantizer.arm_quantizer import (
+from executorch.backends.arm.quantizer import (
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
diff --git a/backends/arm/test/ops/test_sigmoid_16bit.py b/backends/arm/test/ops/test_sigmoid_16bit.py
index 240000e6973..3cd11699a0a 100644
--- a/backends/arm/test/ops/test_sigmoid_16bit.py
+++ b/backends/arm/test/ops/test_sigmoid_16bit.py
@@ -6,7 +6,7 @@
 import pytest
 
 import torch
-from executorch.backends.arm.quantizer.arm_quantizer import (
+from executorch.backends.arm.quantizer import (
     get_symmetric_quantization_config,
     TOSAQuantizer,
 )
diff --git a/backends/arm/test/ops/test_sigmoid_32bit.py b/backends/arm/test/ops/test_sigmoid_32bit.py
index 14808eedaf9..fbfc263a6d0 100644
--- a/backends/arm/test/ops/test_sigmoid_32bit.py
+++ b/backends/arm/test/ops/test_sigmoid_32bit.py
@@ -5,7 +5,7 @@
 
 import pytest
 import torch
-from executorch.backends.arm.quantizer.arm_quantizer import TOSAQuantizer
+from executorch.backends.arm.quantizer import TOSAQuantizer
 from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
diff --git a/backends/arm/test/ops/test_var.py b/backends/arm/test/ops/test_var.py
index 6690c668f94..fb23f24307e 100644
--- a/backends/arm/test/ops/test_var.py
+++ b/backends/arm/test/ops/test_var.py
@@ -11,7 +11,7 @@
 import unittest
 
 import torch
-from executorch.backends.arm.quantizer.arm_quantizer import (
+from executorch.backends.arm.quantizer import (
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
diff --git a/backends/arm/test/ops/test_where.py b/backends/arm/test/ops/test_where.py
index dd4f3326f8e..91d616232fa 100644
--- a/backends/arm/test/ops/test_where.py
+++ b/backends/arm/test/ops/test_where.py
@@ -9,7 +9,7 @@
 
 import torch
 
-from executorch.backends.arm.quantizer.arm_quantizer import (
+from executorch.backends.arm.quantizer import (
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
diff --git a/backends/arm/test/quantizer/test_generic_annotater.py b/backends/arm/test/quantizer/test_generic_annotater.py
index 61be0ccb3e9..275a44583dc 100644
--- a/backends/arm/test/quantizer/test_generic_annotater.py
+++ b/backends/arm/test/quantizer/test_generic_annotater.py
@@ -7,7 +7,7 @@
 import unittest
 
 import torch
-from executorch.backends.arm.quantizer.arm_quantizer_utils import is_annotated
+from executorch.backends.arm.quantizer import is_annotated
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
 from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
index 9f5bb778e78..1993df0d091 100644
--- a/backends/arm/test/targets.bzl
+++ b/backends/arm/test/targets.bzl
@@ -19,6 +19,11 @@ def define_arm_tests():
         "ops/test_tanh.py",
     ]
 
+    # Quantization
+    test_files += [
+        "quantizer/test_generic_annotater.py",
+    ]
+
     TESTS = {}
 
     for test_file in test_files:
diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py
index 6346a53edef..d7434e7149a 100644
--- a/backends/arm/test/tester/arm_tester.py
+++ b/backends/arm/test/tester/arm_tester.py
@@ -27,7 +27,7 @@
     is_tosa,
 )
 from executorch.backends.arm.ethosu_partitioner import EthosUPartitioner
-from executorch.backends.arm.quantizer.arm_quantizer import (
+from executorch.backends.arm.quantizer import (
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
diff --git a/backends/arm/test/tester/test_pipeline.py b/backends/arm/test/tester/test_pipeline.py
index cad62c021f6..95d9898c19d 100644
--- a/backends/arm/test/tester/test_pipeline.py
+++ b/backends/arm/test/tester/test_pipeline.py
@@ -8,7 +8,7 @@
 
 import torch
 
-from executorch.backends.arm.quantizer.arm_quantizer import (
+from executorch.backends.arm.quantizer import (
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
diff --git a/backends/arm/tosa_quant_utils.py b/backends/arm/tosa_quant_utils.py
index 0cfa19eb453..afdbf78422e 100644
--- a/backends/arm/tosa_quant_utils.py
+++ b/backends/arm/tosa_quant_utils.py
@@ -10,8 +10,6 @@
 import math
 from typing import cast, List, NamedTuple, Tuple
 
-import executorch.backends.arm.tosa_mapping
-
 import torch.fx
 import torch.fx.node
 
@@ -234,7 +232,7 @@ def build_rescale(
 
 def build_rescale_to_int32(
     tosa_fb: ts.TosaSerializer,
-    input_arg: executorch.backends.arm.tosa_mapping.TosaArg,
+    input_arg: TosaArg,
     input_zp: int,
     rescale_scale: list[float],
     is_scale32: bool = True,
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
index cc54f0bf79e..5b3d39d9f6d 100644
--- a/examples/arm/aot_arm_compiler.py
+++ b/examples/arm/aot_arm_compiler.py
@@ -24,7 +24,7 @@
     is_tosa,
 )
 from executorch.backends.arm.ethosu_partitioner import EthosUPartitioner
-from executorch.backends.arm.quantizer.arm_quantizer import (
+from executorch.backends.arm.quantizer import (
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
diff --git a/examples/arm/ethos_u_minimal_example.ipynb b/examples/arm/ethos_u_minimal_example.ipynb
index d73695e9d48..ab62d0dd2ce 100644
--- a/examples/arm/ethos_u_minimal_example.ipynb
+++ b/examples/arm/ethos_u_minimal_example.ipynb
@@ -80,7 +80,7 @@
    "outputs": [],
    "source": [
     "from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder\n",
-    "from executorch.backends.arm.quantizer.arm_quantizer import (\n",
+    "from executorch.backends.arm.quantizer import (\n",
     "    EthosUQuantizer,\n",
     "    get_symmetric_quantization_config,\n",
     ")\n",
@@ -89,7 +89,7 @@
     "target = \"ethos-u55-128\"\n",
     "\n",
     "# Create a compilation spec describing the target for configuring the quantizer\n",
-    "# Some args are used by the Arm Vela graph compiler later in the example. Refer to Arm Vela documentation for an \n",
+    "# Some args are used by the Arm Vela graph compiler later in the example. Refer to Arm Vela documentation for an\n",
     "# explanation of its flags: https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela/-/blob/main/OPTIONS.md\n",
     "spec_builder = ArmCompileSpecBuilder().ethosu_compile_spec(\n",
     "            target,\n",
@@ -100,12 +100,12 @@
     "compile_spec = spec_builder.build()\n",
     "\n",
     "# Create and configure quantizer to use a symmetric quantization config globally on all nodes\n",
-    "quantizer = EthosUQuantizer(compile_spec) \n",
+    "quantizer = EthosUQuantizer(compile_spec)\n",
     "operator_config = get_symmetric_quantization_config(is_per_channel=False)\n",
     "quantizer.set_global(operator_config)\n",
     "\n",
     "# Post training quantization\n",
-    "quantized_graph_module = prepare_pt2e(graph_module, quantizer) \n",
+    "quantized_graph_module = prepare_pt2e(graph_module, quantizer)\n",
     "quantized_graph_module(*example_inputs) # Calibrate the graph module with the example input\n",
     "quantized_graph_module = convert_pt2e(quantized_graph_module)\n",
     "\n",
@@ -128,8 +128,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import subprocess \n",
-    "import os \n",
+    "import subprocess\n",
+    "import os\n",
     "\n",
     "# Setup paths\n",
     "cwd_dir = os.getcwd()\n",
@@ -170,9 +170,9 @@
     "    to_edge_transform_and_lower,\n",
     ")\n",
     "from executorch.extension.export_util.utils import save_pte_program\n",
-    "import platform \n",
+    "import platform\n",
     "\n",
-    "# Create partitioner from compile spec \n",
+    "# Create partitioner from compile spec\n",
     "partitioner = EthosUPartitioner(compile_spec)\n",
     "\n",
     "# Lower the exported program to the Ethos-U backend\n",
@@ -185,8 +185,8 @@
     "        )\n",
     "\n",
     "# Load quantization ops library\n",
-    "os_aot_lib_names = {\"Darwin\" : \"libquantized_ops_aot_lib.dylib\", \n",
-    "                \"Linux\"  : \"libquantized_ops_aot_lib.so\", \n",
+    "os_aot_lib_names = {\"Darwin\" : \"libquantized_ops_aot_lib.dylib\",\n",
+    "                \"Linux\"  : \"libquantized_ops_aot_lib.so\",\n",
     "                \"Windows\": \"libquantized_ops_aot_lib.dll\"}\n",
     "aot_lib_name = os_aot_lib_names[platform.system()]\n",
     "\n",
@@ -226,7 +226,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Build executorch \n",
+    "# Build executorch\n",
     "subprocess.run(os.path.join(script_dir, \"build_executorch.sh\"), shell=True, cwd=et_dir)\n",
     "\n",
     "# Build portable kernels\n",