Arm backend: Add check for unsupported dtypes on Ethos-U55

Erik-Lundell · Erik-Lundell · commit 7889c0fcbe43 · 2025-04-01T14:51:13.000+02:00
Move all Ethos-U55 support checks into a single file.

Signed-off-by: Erik Lundell &lt;erik.lundell@arm.com&gt;
Change-Id: Ib6444abdbe1cc15d7ec1a91efa15362022f57895
diff --git a/backends/arm/operator_support/__init__.py b/backends/arm/operator_support/__init__.py
@@ -7,6 +7,7 @@
 
 from . import (  # noqa
     convolution_support,
+    ethos_u55_support,
     minmax_support,
     pool_2d_support,
     reduce_sum_support,
diff --git a/backends/arm/operator_support/ethos_u55_support.py b/backends/arm/operator_support/ethos_u55_support.py
@@ -0,0 +1,176 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import typing
+
+import torch
+import torch.fx as fx
+from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
+from executorch.backends.arm._passes.insert_table_ops import TableOps
+from executorch.exir.backend.utils import WhyNoPartitionReporter
+
+from executorch.exir.dialects._ops import ops as exir_ops
+from torch.fx.passes.operator_support import OperatorSupportBase
+
+
+class EthosU55DtypeSupport(OperatorSupportBase):
+
+    def __init__(self, reporter: WhyNoPartitionReporter):
+        super().__init__()
+        self.reporter = reporter
+
+    targeted_ops_i8_i16_i32 = [
+        exir_ops.edge.aten.cat.default,
+        exir_ops.edge.aten.repeat.default,
+        exir_ops.edge.aten.constant_pad_nd.default,
+        exir_ops.edge.aten.view.default,
+        exir_ops.edge.aten.permute.default,
+    ]
+
+    target_ops_i8 = tuple(TableOps.included_ops())
+
+    def _try_determine_dtype(self, node: fx.Node) -> torch.dtype | None:
+        """Attempt to figure out the quantized data type of node. On failure, return None."""
+
+        dtype = get_first_fake_tensor(node).dtype
+        if not dtype.is_floating_point:
+            return dtype
+
+        if (
+            node.target
+            is exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default
+        ):
+            return get_first_fake_tensor(node.all_input_nodes[0]).dtype
+
+        if len(node.users) == 0:
+            return None
+
+        q_node = list(node.users)[0]
+        if (
+            q_node.target
+            is exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
+        ):
+            return typing.cast(torch.dtype, q_node.args[-1])
+
+        # We can't easily figure out dtype, return None
+        return None
+
+    def is_node_supported(  # noqa: C901
+        self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node
+    ) -> bool:
+
+        dtype = self._try_determine_dtype(node)
+        if dtype is None:
+            # If we couldn't determine dtype, just return ok.
+            return True
+
+        if node.target in self.targeted_ops_i8_i16_i32:
+            if dtype not in (torch.int8, torch.int16, torch.int32):
+                self.reporter.report_reject(
+                    node, f"Unsupported dtype {dtype} (Supports i8, i16, i32)."
+                )
+                return False
+
+        if node.target in self.target_ops_i8:
+            if dtype not in (torch.int8,):
+                self.reporter.report_reject(
+                    node, f"Unsupported dtype {dtype} (Supports i8)."
+                )
+                return False
+
+        if node.target == exir_ops.edge.aten.convolution.default:
+            ifm, weight = node.all_input_nodes[0:2]
+            ifm_dtype = self._try_determine_dtype(ifm)
+            if ifm_dtype is not None and ifm_dtype not in (torch.int8, torch.int16):
+                self.reporter.report_reject(
+                    node, f"Unsupported input dtype {dtype} (Supports i8, i16)."
+                )
+                return False
+            weight_dtype = self._try_determine_dtype(weight)
+            if weight_dtype is not None and weight_dtype not in (torch.int8,):
+                self.reporter.report_reject(
+                    node, f"Unsupported weight dtype {dtype} (Supports i8)."
+                )
+                return False
+            if len(node.all_input_nodes) > 2:
+                bias = node.all_input_nodes[2]
+                bias_dtype = self._try_determine_dtype(bias)
+                if bias_dtype is not None and bias_dtype not in (torch.int32,):
+                    self.reporter.report_reject(
+                        node, f"Unsupported bias dtype {dtype} (Supports i32)."
+                    )
+                    return False
+
+        if node.target in (
+            exir_ops.edge.aten.mm.default,
+            exir_ops.edge.aten.bmm.default,
+        ):
+            for input_node in node.all_input_nodes:
+                dtype = self._try_determine_dtype(input_node)
+                if dtype is not None and dtype != torch.int8:
+                    self.reporter.report_reject(
+                        input_node,
+                        f"Input {input_node.name} has unsupported dtype {dtype} (Supports i8).",
+                    )
+                    return False
+
+        return True
+
+
+class EthosU55NotSupported(OperatorSupportBase):
+    """
+    Certain operators are not supported on U55. These are listed in `unsupported_ops`.
+    The comment mentions the unsupported TOSA operator that the aten operator maps to where it is not obvious.
+    For unimplemented operators, this is the anticipated mapping, and it might be incorrect.
+    """
+
+    unsupported_ops = [
+        exir_ops.edge.aten.any.default,  # REDUCE_ANY
+        exir_ops.edge.aten.any.dim,  # REDUCE_ANY
+        exir_ops.edge.aten.any.dims,  # REDUCE_ANY
+        exir_ops.edge.aten.bitwise_and.Tensor,
+        exir_ops.edge.aten.bitwise_or.Tensor,
+        exir_ops.edge.aten.bitwise_xor.Tensor,
+        exir_ops.edge.aten.bitwise_not,
+        exir_ops.edge.aten.logical_and.default,
+        exir_ops.edge.aten.logical_or.default,
+        exir_ops.edge.aten.logical_xor.default,
+        exir_ops.edge.aten.logical_not.default,
+        exir_ops.edge.aten.amax.default,  # REDUCE_MAX
+        exir_ops.edge.aten.amin.default,  # REDUCE_MIN
+        exir_ops.edge.aten.eq.Tensor,
+        exir_ops.edge.aten.eq.Scalar,
+        exir_ops.edge.aten.ge.Tensor,
+        exir_ops.edge.aten.gt.Tensor,
+        exir_ops.edge.aten.le.Tensor,
+        exir_ops.edge.aten.lt.Tensor,
+        exir_ops.edge.aten.flip.default,  # REVERSE
+        exir_ops.edge.aten.grid_sampler_2d,  # GATHER
+        exir_ops.edge.aten.scatter.src,
+        exir_ops.edge.aten.scatter.value,
+        exir_ops.edge.aten.select_scatter.default,
+        exir_ops.edge.aten.scatter_reduce.two,
+        exir_ops.edge.aten.scatter_add.default,
+        exir_ops.edge.aten.upsample_nearest2d.vec,  # RESIZE
+        exir_ops.edge.aten.upsample_bilinear2d.vec,  # RESIZE
+        exir_ops.edge.aten.reflection_pad1d.default,  # REVERSE
+        exir_ops.edge.aten.reflection_pad2d.default,  # REVERSE
+        exir_ops.edge.aten.reflection_pad3d.default,  # REVERSE
+    ]
+
+    def __init__(self, reporter: WhyNoPartitionReporter):
+        self.reporter = reporter
+
+    def is_node_supported(
+        self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node
+    ) -> bool:
+
+        if node.target in self.unsupported_ops:
+            self.reporter.report_reject(node, "Op is not supported on U55.")
+            return False
+
+        return True
diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py
@@ -18,6 +18,10 @@
 from executorch.backends.arm._passes.fuse_quantized_activation_pass import (
     FuseQuantizedActivationPass,
 )
+from executorch.backends.arm.operator_support.ethos_u55_support import (
+    EthosU55DtypeSupport,
+    EthosU55NotSupported,
+)
 from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification
 from executorch.exir import ExportedProgram
 from executorch.exir.backend.utils import WhyNoPartitionReporter
@@ -118,6 +122,7 @@ def tosa_support_factory(
         negative_checks.append(CheckProperQuantization(reporter))
     if isinstance(tosa_spec, Tosa_0_80) and tosa_spec.is_U55_subset:
         negative_checks.append(EthosU55NotSupported(reporter))
+        negative_checks.append(EthosU55DtypeSupport(reporter))
 
     return chain(
         reporter.wrap_check(
@@ -216,61 +221,6 @@ def is_node_supported(
         return supported
 
 
-class EthosU55NotSupported(OperatorSupportBase):
-    """
-    Certain operators are not supported on U55. These are listed in `unsupported_ops`.
-    The comment mentions the unsupported TOSA operator that the aten operator maps to where it is not obvious.
-    For unimplemented operators, this is the anticipated mapping, and it might be incorrect.
-    """
-
-    unsupported_ops = [
-        exir_ops.edge.aten.any.default,  # REDUCE_ANY
-        exir_ops.edge.aten.any.dim,  # REDUCE_ANY
-        exir_ops.edge.aten.any.dims,  # REDUCE_ANY
-        exir_ops.edge.aten.bitwise_and.Tensor,
-        exir_ops.edge.aten.bitwise_or.Tensor,
-        exir_ops.edge.aten.bitwise_xor.Tensor,
-        exir_ops.edge.aten.bitwise_not,
-        exir_ops.edge.aten.logical_and.default,
-        exir_ops.edge.aten.logical_or.default,
-        exir_ops.edge.aten.logical_xor.default,
-        exir_ops.edge.aten.logical_not.default,
-        exir_ops.edge.aten.amax.default,  # REDUCE_MAX
-        exir_ops.edge.aten.amin.default,  # REDUCE_MIN
-        exir_ops.edge.aten.eq.Tensor,
-        exir_ops.edge.aten.eq.Scalar,
-        exir_ops.edge.aten.ge.Tensor,
-        exir_ops.edge.aten.gt.Tensor,
-        exir_ops.edge.aten.le.Tensor,
-        exir_ops.edge.aten.lt.Tensor,
-        exir_ops.edge.aten.flip.default,  # REVERSE
-        exir_ops.edge.aten.grid_sampler_2d,  # GATHER
-        exir_ops.edge.aten.scatter.src,
-        exir_ops.edge.aten.scatter.value,
-        exir_ops.edge.aten.select_scatter.default,
-        exir_ops.edge.aten.scatter_reduce.two,
-        exir_ops.edge.aten.scatter_add.default,
-        exir_ops.edge.aten.upsample_nearest2d.vec,  # RESIZE
-        exir_ops.edge.aten.upsample_bilinear2d.vec,  # RESIZE
-        exir_ops.edge.aten.reflection_pad1d.default,  # REVERSE
-        exir_ops.edge.aten.reflection_pad2d.default,  # REVERSE
-        exir_ops.edge.aten.reflection_pad3d.default,  # REVERSE
-    ]
-
-    def __init__(self, reporter: WhyNoPartitionReporter):
-        self.reporter = reporter
-
-    def is_node_supported(
-        self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node
-    ) -> bool:
-
-        if node.target in self.unsupported_ops:
-            self.reporter.report_reject(node, "Op is not supported on U55.")
-            return False
-
-        return True
-
-
 class NeedsDecompositionCheck(OperatorSupportBase):
     """
     Targeted operators need to be decomposed prior to quantization in order to get a pair of q-dq-nodes surrounding
diff --git a/backends/arm/test/ops/test_sigmoid_16bit.py b/backends/arm/test/ops/test_sigmoid_16bit.py
@@ -13,8 +13,8 @@
 from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
-    EthosU55PipelineBI,
     EthosU85PipelineBI,
+    OpNotSupportedPipeline,
     TosaPipelineBI,
 )
 from executorch.backends.xnnpack.test.tester import Quantize
@@ -109,22 +109,10 @@ def test_sigmoid_add_sigmoid_tosa_BI(test_data):
 @common.parametrize(
     "test_data",
     test_data_suite,
-    xfails={
-        "ones": "AssertionError: Output 0 does not match reference output. MLBEDSW-9770",
-        "rand": "AssertionError: Output 0 does not match reference output. MLBEDSW-9770",
-        "rand_4d": "AssertionError: Output 0 does not match reference output. MLBEDSW-9770",
-        "randn_pos": "AssertionError: Output 0 does not match reference output. MLBEDSW-9770",
-        "randn_neg": "AssertionError: Output 0 does not match reference output. MLBEDSW-9770",
-        "ramp": "AssertionError: Output 0 does not match reference output. MLBEDSW-9770",
-    },
-    # int16 tables are not supported, but some tests happen to pass regardless.
-    # Set them to xfail but strict=False -> ok if they pass.
-    strict=False,
 )
-@common.XfailIfNoCorstone300
 def test_sigmoid_tosa_u55(test_data):
-    pipeline = EthosU55PipelineBI(
-        Sigmoid(), (test_data(),), Sigmoid.aten_op, Sigmoid.exir_op, run_on_fvp=True
+    pipeline = OpNotSupportedPipeline(
+        Sigmoid(), (test_data(),), "TOSA-0.80+BI+u55", {Sigmoid.exir_op: 1}
     )
     pipeline.change_args("quantize", get_16bit_sigmoid_quantizer("TOSA-0.80+BI+u55"))
     pipeline.run()
@@ -133,26 +121,14 @@ def test_sigmoid_tosa_u55(test_data):
 @common.parametrize(
     "test_data",
     test_data_suite,
-    xfails={
-        "ones": "AssertionError: Output 0 does not match reference output. MLBEDSW-9770",
-        "rand": "AssertionError: Output 0 does not match reference output. MLBEDSW-9770",
-        "rand_4d": "AssertionError: Output 0 does not match reference output. MLBEDSW-9770",
-        "randn_neg": "AssertionError: Output 0 does not match reference output. MLBEDSW-9770",
-        "randn_pos": "AssertionError: Output 0 does not match reference output. MLBEDSW-9770",
-        "ramp": "AsssertionError: Output 0 does not match reference output. MLBEDSW-9770",
-    },
-    # int16 tables are not supported, but some tests happen to pass regardless.
-    # Set them to xfail but strict=False -> ok if they pass.
-    strict=False,
 )
-@common.XfailIfNoCorstone300
 def test_sigmoid_add_sigmoid_tosa_u55(test_data):
-    pipeline = EthosU55PipelineBI(
+    pipeline = OpNotSupportedPipeline(
         SigmoidAddSigmoid(),
         (test_data(),),
-        Sigmoid.aten_op,
-        Sigmoid.exir_op,
-        run_on_fvp=True,
+        "TOSA-0.80+BI+u55",
+        {Sigmoid.exir_op: 3},
+        n_expected_delegates=1,
     )
     pipeline.change_args("quantize", get_16bit_sigmoid_quantizer("TOSA-0.80+BI+u55"))
     pipeline.run()
diff --git a/backends/arm/test/ops/test_sigmoid_32bit.py b/backends/arm/test/ops/test_sigmoid_32bit.py