|
| 1 | +# Copyright 2025 Arm Limited and/or its affiliates. |
| 2 | +# |
| 3 | +# This source code is licensed under the BSD-style license found in the |
| 4 | +# LICENSE file in the root directory of this source tree. |
| 5 | + |
| 6 | +import logging |
| 7 | +from copy import copy |
| 8 | +from typing import cast |
| 9 | + |
| 10 | +import torch |
| 11 | +from executorch.backends.arm._passes.arm_pass_utils import create_node |
| 12 | +from executorch.backends.arm.tosa_quant_utils import dq_op, q_op, QuantArgs |
| 13 | +from executorch.exir.pass_base import ExportPass, PassResult |
| 14 | +from torch import Tensor |
| 15 | +from torch.fx import GraphModule, Node |
| 16 | +from torch.library import custom_op, register_fake |
| 17 | + |
| 18 | +logger = logging.getLogger(__name__) |
| 19 | + |
| 20 | + |
| 21 | +@custom_op("tosa::_rescale", mutates_args=()) # type: ignore[misc] |
| 22 | +def rescale( |
| 23 | + x: Tensor, dtype: torch.dtype, scale: float, in_zp: int, out_zp: int |
| 24 | +) -> Tensor: |
| 25 | + logger.warning( |
| 26 | + "Ran default implementation of tosa::_rescale." |
| 27 | + "This op is meant to always be inserted inside a partition and a correct default implementation is not implemented." |
| 28 | + ) |
| 29 | + # Clone is needed to not return reference when rescaling to same dtype. |
| 30 | + # This is a neccessary requirement for non-mutating custom ops. |
| 31 | + return x.to(dtype=dtype).clone() |
| 32 | + |
| 33 | + |
| 34 | +@register_fake("tosa::_rescale") # type: ignore[misc] |
| 35 | +def rescale_fake( |
| 36 | + x: Tensor, dtype: torch.dtype, scale: float, in_zp: int, out_zp: int |
| 37 | +) -> Tensor: |
| 38 | + """Casts the input tensor to dtype `dtype` to produce the correct tensor meta for a _rescale op. |
| 39 | + Additionally validates TOSA constraints of a RESCALE op. |
| 40 | + """ |
| 41 | + if not (dtype == torch.int32 or dtype == torch.int8): |
| 42 | + raise NotImplementedError( |
| 43 | + "tosa::rescale currently only supports int32 and int8." |
| 44 | + ) |
| 45 | + if dtype == torch.int32 and out_zp != 0: |
| 46 | + raise ValueError( |
| 47 | + "TOSA requires output_zp to be zero when the output dtype is int32." |
| 48 | + ) |
| 49 | + if x.dtype == torch.int32 and in_zp != 0: |
| 50 | + raise ValueError( |
| 51 | + "TOSA requires input_zp to be zero when the input dtype is int32." |
| 52 | + ) |
| 53 | + if x.dtype == torch.int8 and not -128 <= in_zp <= 127: |
| 54 | + raise ValueError(f"{in_zp=} outside valid range (-128,127) for int8.") |
| 55 | + if dtype == torch.int8 and not -128 <= out_zp <= 127: |
| 56 | + raise ValueError(f"{out_zp=} outside valid range (-128,127) for int8.") |
| 57 | + |
| 58 | + return x.to(dtype=dtype).clone() |
| 59 | + |
| 60 | + |
| 61 | +class InsertRescalePass(ExportPass): |
| 62 | + """Finds patterns of dq -> q, and replaces them |
| 63 | + with passthrough_to_tosa::rescales. |
| 64 | +
|
| 65 | + Does not garantuee that the dtypes and zero points are valid |
| 66 | + in TOSA, that is the job of the quantization annotator that |
| 67 | + produced the dq and q nodes. The TOSA constraints are validated |
| 68 | + in the fake implementation of passthrough_to_tosa:rescale. |
| 69 | + """ |
| 70 | + |
| 71 | + def fold_dq_q_to_rescale(self, node: Node, user: Node, graph_module: GraphModule): |
| 72 | + dq_args = QuantArgs.from_operator(node.target, node.args) |
| 73 | + q_args = QuantArgs.from_operator(user.target, user.args) |
| 74 | + new_scale = dq_args.scale / q_args.scale |
| 75 | + |
| 76 | + with graph_module.graph.inserting_before(node): |
| 77 | + rescale_node = create_node( |
| 78 | + graph_module.graph, |
| 79 | + torch.ops.tosa._rescale.default, |
| 80 | + ( |
| 81 | + node.all_input_nodes[0], |
| 82 | + q_args.dtype, |
| 83 | + new_scale, |
| 84 | + dq_args.zp, |
| 85 | + q_args.zp, |
| 86 | + ), |
| 87 | + ) |
| 88 | + rescale_node.meta = copy(user.meta) |
| 89 | + user.replace_all_uses_with(rescale_node) |
| 90 | + graph_module.graph.erase_node(user) |
| 91 | + |
| 92 | + def call(self, graph_module: GraphModule) -> PassResult: |
| 93 | + modified = False |
| 94 | + for node in graph_module.graph.nodes: |
| 95 | + node = cast(Node, node) |
| 96 | + |
| 97 | + if node.target is not dq_op: |
| 98 | + continue |
| 99 | + # Copy users since we remove them while iterating, modyfing the node.users list. |
| 100 | + for user in copy(node.users): |
| 101 | + if user.target is q_op: |
| 102 | + self.fold_dq_q_to_rescale(node, user, graph_module) |
| 103 | + modified = True |
| 104 | + if len(node.users) == 0: |
| 105 | + graph_module.graph.erase_node(node) |
| 106 | + |
| 107 | + graph_module = super().call(graph_module).graph_module |
| 108 | + graph_module.recompile() |
| 109 | + return PassResult(graph_module, modified) |
0 commit comments