diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt index 8e800d8bef2..203cdf2c314 100644 --- a/backends/qualcomm/CMakeLists.txt +++ b/backends/qualcomm/CMakeLists.txt @@ -130,6 +130,7 @@ add_library(qnn_implementation STATIC) add_library(qnn_logger STATIC) add_library(qnn_manager STATIC) add_library(qnn_mem_manager STATIC) +add_library(qnn_op_package_manager STATIC) add_library(qnn_profiler STATIC) add_library(qnn_schema INTERFACE ${_qnn_schema__outputs}) add_library(qnn_sys_function_interface INTERFACE) @@ -152,7 +153,7 @@ target_link_libraries( target_link_libraries(qnn_executorch_logging PRIVATE qnn_schema) target_link_libraries(qnn_profiler PRIVATE qnn_executorch_logging) target_link_libraries(qnn_logger PRIVATE qnn_implementation ${android_log}) -target_link_libraries(qnn_backend PRIVATE qnn_implementation qnn_logger) +target_link_libraries(qnn_backend PRIVATE qnn_implementation qnn_logger qnn_op_package_manager) target_link_libraries(qnn_custom_protocol PRIVATE qnn_logger) target_link_libraries( qnn_device PRIVATE qnn_executorch_logging qnn_implementation qnn_logger diff --git a/backends/qualcomm/builders/README.md b/backends/qualcomm/builders/README.md index 22f0852941c..77944a8bfc2 100644 --- a/backends/qualcomm/builders/README.md +++ b/backends/qualcomm/builders/README.md @@ -176,7 +176,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA # op builder will inherit NodeVisitor and have its own implementation # register_node_visitor for book-keeping the dictionary of target name v.s. callback -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor # the definitions required to build operator in QNN from .qnn_constants import OpLayerNorm, QNN_OP_PACKAGE_NAME_QTI_AISW # utility to get parameter value when creating tensor in QNN diff --git a/backends/qualcomm/builders/node_visitor.py b/backends/qualcomm/builders/node_visitor.py index 2f5064f258e..37fe3615268 100644 --- a/backends/qualcomm/builders/node_visitor.py +++ b/backends/qualcomm/builders/node_visitor.py @@ -63,7 +63,9 @@ torch.int64: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_INT_64, torch.uint8: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_8, torch.uint16: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_16, + torch.uint32: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32, float: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_FLOAT_32, + int: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32, } PER_CHANNEL_ENCODING = { @@ -470,51 +472,3 @@ def define_node( ) -> PyQnnWrapper.PyQnnOpWrapper: """Convert torch.fx.Node to OpWrapper""" raise NotImplementedError("NodeVisitor must be extended!") - - -# This will hold mapping of all node names to the visitor class -_node_visitor_dict = {} - - -def register_node_visitor(visitor): - """Register node visitor into _node_visitor_dict""" - assert ( - isinstance(visitor, type) - and issubclass(visitor, NodeVisitor) - and hasattr(visitor, "target") - ), f"Illformed NodeVisitor subclass, can't register!, got: {visitor}" - for target in visitor.target: - _node_visitor_dict[target] = visitor - - -def generate_node_to_external_map( - edge_program: torch.export.ExportedProgram, -) -> Dict[torch.fx.Node, int]: - node_to_external_map = {} - for node in edge_program.graph_module.graph.nodes: - # The order in which we visit the placeholder node is same as the *args - # order for the forward(*args) signature for this gm. Using the order of - # the nodes as external_id to extract the right arg from *args at runtime - if is_graph_input(node, edge_program): - node_to_external_map[node] = len(node_to_external_map) - for node in edge_program.graph_module.graph.nodes: - if is_graph_output(node): - node_to_external_map[node] = len(node_to_external_map) - return node_to_external_map - - -def get_node_visitors( - edge_program: torch.export.ExportedProgram, - enable_tensor_dump=False, -) -> Dict[str, NodeVisitor]: - """Create a new class instance at runtime, and put them in a dict""" - node_to_external_map = generate_node_to_external_map(edge_program) - node_visitors = {} - for target, visitor in _node_visitor_dict.items(): - assert callable( - visitor - ), f"Expeting a callable class, but got {visitor} of type {type(visitor)}" - node_visitors[target] = visitor( - node_to_external_map, edge_program, enable_tensor_dump - ) - return node_visitors diff --git a/backends/qualcomm/builders/node_visitor_manager.py b/backends/qualcomm/builders/node_visitor_manager.py new file mode 100644 index 00000000000..fa9d51db1ad --- /dev/null +++ b/backends/qualcomm/builders/node_visitor_manager.py @@ -0,0 +1,77 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List + +import torch +from executorch.backends.qualcomm.serialization.qc_schema import ( + QnnExecuTorchOpPackageInfo, +) + +from .node_visitor import NodeVisitor +from .op_custom_op import CustomOp +from .utils import is_graph_input, is_graph_output + + +# This will hold mapping of all node names to the visitor class +_node_visitor_dict = {} + + +def register_node_visitor(visitor): + """Register node visitor into _node_visitor_dict""" + assert ( + isinstance(visitor, type) + and issubclass(visitor, NodeVisitor) + and hasattr(visitor, "target") + ), f"Informed NodeVisitor subclass, can't register!, got: {visitor}" + for target in visitor.target: + _node_visitor_dict[target] = visitor + + +def generate_node_to_external_map( + edge_program: torch.export.ExportedProgram, +) -> Dict[torch.fx.Node, int]: + node_to_external_map = {} + for node in edge_program.graph_module.graph.nodes: + # The order in which we visit the placeholder node is same as the *args + # order for the forward(*args) signature for this gm. Using the order of + # the nodes as external_id to extract the right arg from *args at runtime + if is_graph_input(node, edge_program): + node_to_external_map[node] = len(node_to_external_map) + for node in edge_program.graph_module.graph.nodes: + if is_graph_output(node): + node_to_external_map[node] = len(node_to_external_map) + return node_to_external_map + + +def get_node_visitors( + edge_program: torch.export.ExportedProgram, + enable_tensor_dump=False, + op_package_infos: List[QnnExecuTorchOpPackageInfo] = None, +) -> Dict[str, NodeVisitor]: + """Create a new class instance at runtime, and put them in a dict""" + node_to_external_map = generate_node_to_external_map(edge_program) + node_visitors = {} + for target, visitor in _node_visitor_dict.items(): + assert callable( + visitor + ), f"Expecting a callable class, but got {visitor} of type {type(visitor)}" + node_visitors[target] = visitor( + node_to_external_map, edge_program, enable_tensor_dump + ) + if op_package_infos: + custom_ops = [] + for op_package_info in op_package_infos: + if op_package_info.custom_op_name not in custom_ops: + custom_op_builder = CustomOp( + op_package_info, + node_to_external_map, + edge_program, + enable_tensor_dump, + ) + node_visitors[op_package_info.custom_op_name] = custom_op_builder + custom_ops.append(op_package_info.custom_op_name) + return node_visitors diff --git a/backends/qualcomm/builders/op_abs.py b/backends/qualcomm/builders/op_abs.py index 2209ffc792c..1df49b88912 100644 --- a/backends/qualcomm/builders/op_abs.py +++ b/backends/qualcomm/builders/op_abs.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseAbs, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_adaptive_avg_pool2d.py b/backends/qualcomm/builders/op_adaptive_avg_pool2d.py index 777e1f61ada..1b0d58482ec 100644 --- a/backends/qualcomm/builders/op_adaptive_avg_pool2d.py +++ b/backends/qualcomm/builders/op_adaptive_avg_pool2d.py @@ -11,7 +11,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpPoolAvg2d, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_add.py b/backends/qualcomm/builders/op_add.py index f8fb31fb725..d2f4a39fc3d 100644 --- a/backends/qualcomm/builders/op_add.py +++ b/backends/qualcomm/builders/op_add.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseAdd, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_amax.py b/backends/qualcomm/builders/op_amax.py index 62c17b8dfcd..051355a8b6b 100644 --- a/backends/qualcomm/builders/op_amax.py +++ b/backends/qualcomm/builders/op_amax.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpReduceMax, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_and.py b/backends/qualcomm/builders/op_and.py index 22b63e0d6ff..9e43b4df5b2 100644 --- a/backends/qualcomm/builders/op_and.py +++ b/backends/qualcomm/builders/op_and.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseAnd, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_arange.py b/backends/qualcomm/builders/op_arange.py index 210ab85e506..e8c4c7d5267 100644 --- a/backends/qualcomm/builders/op_arange.py +++ b/backends/qualcomm/builders/op_arange.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor @register_node_visitor diff --git a/backends/qualcomm/builders/op_argmin.py b/backends/qualcomm/builders/op_argmin.py index 0717e6489fd..a9fa2021bb0 100644 --- a/backends/qualcomm/builders/op_argmin.py +++ b/backends/qualcomm/builders/op_argmin.py @@ -10,7 +10,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpArgmin, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_avg_pool2d.py b/backends/qualcomm/builders/op_avg_pool2d.py index 6892e7326f6..6e0f70474ea 100644 --- a/backends/qualcomm/builders/op_avg_pool2d.py +++ b/backends/qualcomm/builders/op_avg_pool2d.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpPoolAvg2d, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_batch_norm.py b/backends/qualcomm/builders/op_batch_norm.py index ec0a7c39348..48c5d5d1b51 100644 --- a/backends/qualcomm/builders/op_batch_norm.py +++ b/backends/qualcomm/builders/op_batch_norm.py @@ -18,7 +18,8 @@ ) from executorch.exir.dialects._ops import ops as exir_ops -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpBatchnorm, QNN_OP_PACKAGE_NAME_QTI_AISW from .utils import get_parameter diff --git a/backends/qualcomm/builders/op_bmm.py b/backends/qualcomm/builders/op_bmm.py index d473d085490..92c8f1dde3e 100644 --- a/backends/qualcomm/builders/op_bmm.py +++ b/backends/qualcomm/builders/op_bmm.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpMatMul, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_cat.py b/backends/qualcomm/builders/op_cat.py index 09f99396589..9f6eb6676cf 100644 --- a/backends/qualcomm/builders/op_cat.py +++ b/backends/qualcomm/builders/op_cat.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpConcat, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_ceil.py b/backends/qualcomm/builders/op_ceil.py index f0a43846d11..6b85592165c 100644 --- a/backends/qualcomm/builders/op_ceil.py +++ b/backends/qualcomm/builders/op_ceil.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseCeil, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_clamp.py b/backends/qualcomm/builders/op_clamp.py index e80c99db352..1e13b70f78e 100644 --- a/backends/qualcomm/builders/op_clamp.py +++ b/backends/qualcomm/builders/op_clamp.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpReluMinMax, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_conv2d.py b/backends/qualcomm/builders/op_conv2d.py index 5a168ca103a..0456cd53524 100644 --- a/backends/qualcomm/builders/op_conv2d.py +++ b/backends/qualcomm/builders/op_conv2d.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import ( OpConv2d, OpDepthWiseConv2d, diff --git a/backends/qualcomm/builders/op_cos.py b/backends/qualcomm/builders/op_cos.py index 69c0d40a026..9ff11d86dda 100644 --- a/backends/qualcomm/builders/op_cos.py +++ b/backends/qualcomm/builders/op_cos.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseCos, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_cum_sum.py b/backends/qualcomm/builders/op_cum_sum.py index dceaea83345..01f93d12664 100644 --- a/backends/qualcomm/builders/op_cum_sum.py +++ b/backends/qualcomm/builders/op_cum_sum.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpCumulativeSum, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_custom_op.py b/backends/qualcomm/builders/op_custom_op.py new file mode 100644 index 00000000000..52a15ef95f2 --- /dev/null +++ b/backends/qualcomm/builders/op_custom_op.py @@ -0,0 +1,93 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +import warnings +from typing import Dict, Iterable + +import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper + +import numpy as np +import torch + +from executorch.backends.qualcomm.serialization.qc_schema import ( + QnnExecuTorchOpPackageInfo, +) + +from executorch.backends.qualcomm.utils.constants import QCOM_DATA + +from .node_visitor import NodeVisitor, QNN_TENSOR_TYPE_MAP + + +class CustomOp(NodeVisitor): + target = "" + op_package_info = QnnExecuTorchOpPackageInfo() + + def __init__(self, op_package_info: QnnExecuTorchOpPackageInfo, *args) -> None: + super().__init__(*args) + self.target = op_package_info.custom_op_name + self.op_package_info = op_package_info + + def define_node( + self, + node: torch.fx.Node, + nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], + ) -> PyQnnWrapper.PyQnnOpWrapper: + custom_op = PyQnnWrapper.PyQnnOpWrapper( + node.name, + self.op_package_info.op_package_name, + self.op_package_info.qnn_op_type_name, + ) + + custom_input_tensors = [] + custom_attr_keys = [arg.name for arg in node.target._schema.arguments] + if len(custom_attr_keys) != len(node.args): + warnings.warn( + f"Number of inputs ({len(node.args)}) mismatch the number of args ({len(custom_attr_keys)}) in schema for the custom node ({self.target}).", + stacklevel=1, + ) + return + for arg, arg_name in zip(node.args, custom_attr_keys): + if arg is None: + continue + if isinstance(arg, torch.fx.Node): + input_tensor = self.get_tensor(arg, node) + input_tensor_wrapper = self.define_tensor( + arg, + node, + input_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, + nodes_to_wrappers, + ) + custom_input_tensors.append(input_tensor_wrapper) + elif isinstance(arg, Iterable): + tensor_parm_shape = [len(arg)] + custom_op.AddTensorParam( + arg_name, + QNN_TENSOR_TYPE_MAP[type(arg[0])], + len(tensor_parm_shape), + tensor_parm_shape, + np.array(arg), + True, + ) + else: + custom_op.AddScalarParam( + arg_name, + QNN_TENSOR_TYPE_MAP[type(arg)], + {QCOM_DATA: arg}, + ) + + output_tensor = self.get_tensor(node, node) + output_tensor_wrapper = self.define_tensor( + node, + node, + output_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, + nodes_to_wrappers, + ) + custom_output_tensors = [output_tensor_wrapper] + + custom_op.AddInputTensors(custom_input_tensors) + custom_op.AddOutputTensors(custom_output_tensors) + return custom_op diff --git a/backends/qualcomm/builders/op_depth_to_space.py b/backends/qualcomm/builders/op_depth_to_space.py index 357b7a81039..908e0949162 100644 --- a/backends/qualcomm/builders/op_depth_to_space.py +++ b/backends/qualcomm/builders/op_depth_to_space.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpDepthToSpace, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_dequantize.py b/backends/qualcomm/builders/op_dequantize.py index 722cdbe7957..c4d9b8c29a4 100644 --- a/backends/qualcomm/builders/op_dequantize.py +++ b/backends/qualcomm/builders/op_dequantize.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpDequantize, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_div.py b/backends/qualcomm/builders/op_div.py index 399e914e290..9fc4a9302b0 100644 --- a/backends/qualcomm/builders/op_div.py +++ b/backends/qualcomm/builders/op_div.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseDivide, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_elu.py b/backends/qualcomm/builders/op_elu.py index f0ac422f4b8..65e8d93f414 100644 --- a/backends/qualcomm/builders/op_elu.py +++ b/backends/qualcomm/builders/op_elu.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElu, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_embedding.py b/backends/qualcomm/builders/op_embedding.py index ba5b1a02077..45adc20fa79 100644 --- a/backends/qualcomm/builders/op_embedding.py +++ b/backends/qualcomm/builders/op_embedding.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpGather, QNN_OP_PACKAGE_NAME_QTI_AISW from .utils import get_parameter diff --git a/backends/qualcomm/builders/op_eq.py b/backends/qualcomm/builders/op_eq.py index 6f33ea78bd1..fcf3213d3a9 100644 --- a/backends/qualcomm/builders/op_eq.py +++ b/backends/qualcomm/builders/op_eq.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseEqual, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_exp.py b/backends/qualcomm/builders/op_exp.py index f736dec85c2..9a80e7fb4f4 100644 --- a/backends/qualcomm/builders/op_exp.py +++ b/backends/qualcomm/builders/op_exp.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseExp, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_expand.py b/backends/qualcomm/builders/op_expand.py index 31d248638ab..01a8da42752 100644 --- a/backends/qualcomm/builders/op_expand.py +++ b/backends/qualcomm/builders/op_expand.py @@ -11,7 +11,8 @@ import numpy as np import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpTile, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_full.py b/backends/qualcomm/builders/op_full.py index 4d9d8318fce..d58efd77791 100644 --- a/backends/qualcomm/builders/op_full.py +++ b/backends/qualcomm/builders/op_full.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor @register_node_visitor diff --git a/backends/qualcomm/builders/op_full_like.py b/backends/qualcomm/builders/op_full_like.py index 2ffdf0c63a5..69609d887aa 100644 --- a/backends/qualcomm/builders/op_full_like.py +++ b/backends/qualcomm/builders/op_full_like.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor @register_node_visitor diff --git a/backends/qualcomm/builders/op_gather.py b/backends/qualcomm/builders/op_gather.py index 7eb8caed57f..140d2a79caf 100644 --- a/backends/qualcomm/builders/op_gather.py +++ b/backends/qualcomm/builders/op_gather.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpGatherElements, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_ge.py b/backends/qualcomm/builders/op_ge.py index 28a29829731..6c5671ff5f2 100644 --- a/backends/qualcomm/builders/op_ge.py +++ b/backends/qualcomm/builders/op_ge.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseGreaterEqual, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_gelu.py b/backends/qualcomm/builders/op_gelu.py index 02356a2eef5..3d111f0cf98 100644 --- a/backends/qualcomm/builders/op_gelu.py +++ b/backends/qualcomm/builders/op_gelu.py @@ -10,7 +10,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpGelu, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_group_norm.py b/backends/qualcomm/builders/op_group_norm.py index a52569cfa7a..c492616d999 100644 --- a/backends/qualcomm/builders/op_group_norm.py +++ b/backends/qualcomm/builders/op_group_norm.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpGroupNorm, QNN_OP_PACKAGE_NAME_QTI_AISW from .utils import get_parameter diff --git a/backends/qualcomm/builders/op_gt.py b/backends/qualcomm/builders/op_gt.py index 8c1ef3a600c..e296589af5a 100644 --- a/backends/qualcomm/builders/op_gt.py +++ b/backends/qualcomm/builders/op_gt.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseGreater, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_hardsigmoid.py b/backends/qualcomm/builders/op_hardsigmoid.py index c30cae92f55..70ac35828d8 100644 --- a/backends/qualcomm/builders/op_hardsigmoid.py +++ b/backends/qualcomm/builders/op_hardsigmoid.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseNeuron, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_hardswish.py b/backends/qualcomm/builders/op_hardswish.py index fb4d0a40515..8a8fa25847d 100644 --- a/backends/qualcomm/builders/op_hardswish.py +++ b/backends/qualcomm/builders/op_hardswish.py @@ -10,7 +10,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpHardSwish, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_hardtanh.py b/backends/qualcomm/builders/op_hardtanh.py index 4025a060ff3..755e45f0e3b 100644 --- a/backends/qualcomm/builders/op_hardtanh.py +++ b/backends/qualcomm/builders/op_hardtanh.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpReluMinMax, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_index.py b/backends/qualcomm/builders/op_index.py index fe6bf4262d8..2a7da815265 100644 --- a/backends/qualcomm/builders/op_index.py +++ b/backends/qualcomm/builders/op_index.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpGather, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_index_put.py b/backends/qualcomm/builders/op_index_put.py index beb3338a62a..a58075bf06c 100644 --- a/backends/qualcomm/builders/op_index_put.py +++ b/backends/qualcomm/builders/op_index_put.py @@ -4,7 +4,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpScatterNd, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_instance_norm.py b/backends/qualcomm/builders/op_instance_norm.py index 828e89a97f2..08c4730ce1d 100644 --- a/backends/qualcomm/builders/op_instance_norm.py +++ b/backends/qualcomm/builders/op_instance_norm.py @@ -18,7 +18,8 @@ ) from executorch.exir.dialects._ops import ops as exir_ops -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpInstanceNorm, QNN_OP_PACKAGE_NAME_QTI_AISW from .utils import get_parameter diff --git a/backends/qualcomm/builders/op_layer_norm.py b/backends/qualcomm/builders/op_layer_norm.py index 5316cb1dabe..7c17980a82e 100644 --- a/backends/qualcomm/builders/op_layer_norm.py +++ b/backends/qualcomm/builders/op_layer_norm.py @@ -13,7 +13,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpLayerNorm, QNN_OP_PACKAGE_NAME_QTI_AISW from .utils import get_parameter diff --git a/backends/qualcomm/builders/op_le.py b/backends/qualcomm/builders/op_le.py index e5784049c5c..ad6a78b3da8 100644 --- a/backends/qualcomm/builders/op_le.py +++ b/backends/qualcomm/builders/op_le.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseLessEqual, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_linear.py b/backends/qualcomm/builders/op_linear.py index 71716e81bca..a73633ac229 100644 --- a/backends/qualcomm/builders/op_linear.py +++ b/backends/qualcomm/builders/op_linear.py @@ -16,7 +16,8 @@ QCOM_ZERO_POINTS, ) -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpFullyConnected, QNN_OP_PACKAGE_NAME_QTI_AISW from .utils import get_parameter diff --git a/backends/qualcomm/builders/op_log.py b/backends/qualcomm/builders/op_log.py index 65125e42316..397e2072489 100644 --- a/backends/qualcomm/builders/op_log.py +++ b/backends/qualcomm/builders/op_log.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseLog, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_log_softmax.py b/backends/qualcomm/builders/op_log_softmax.py index a9ee2ce2d55..947140006a3 100644 --- a/backends/qualcomm/builders/op_log_softmax.py +++ b/backends/qualcomm/builders/op_log_softmax.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpLogSoftmax, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_logical_not.py b/backends/qualcomm/builders/op_logical_not.py index 1eed7d894de..4e8fc8543a7 100644 --- a/backends/qualcomm/builders/op_logical_not.py +++ b/backends/qualcomm/builders/op_logical_not.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseNot, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_lt.py b/backends/qualcomm/builders/op_lt.py index 9494aac9d29..2558a97dfab 100644 --- a/backends/qualcomm/builders/op_lt.py +++ b/backends/qualcomm/builders/op_lt.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseLess, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_matmul.py b/backends/qualcomm/builders/op_matmul.py index 8d45424bd62..5a1e366f384 100644 --- a/backends/qualcomm/builders/op_matmul.py +++ b/backends/qualcomm/builders/op_matmul.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpMatMul, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_max.py b/backends/qualcomm/builders/op_max.py index 57e119922ed..8406973ab5a 100644 --- a/backends/qualcomm/builders/op_max.py +++ b/backends/qualcomm/builders/op_max.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseMaximum, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_max_pool2d.py b/backends/qualcomm/builders/op_max_pool2d.py index a0ef685acd0..5da1bd1ac0f 100644 --- a/backends/qualcomm/builders/op_max_pool2d.py +++ b/backends/qualcomm/builders/op_max_pool2d.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpPoolMax2d, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_mean_dim.py b/backends/qualcomm/builders/op_mean_dim.py index 8fb0e9e3c95..630b1b0b8de 100644 --- a/backends/qualcomm/builders/op_mean_dim.py +++ b/backends/qualcomm/builders/op_mean_dim.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpReduceMean, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_min.py b/backends/qualcomm/builders/op_min.py index 72224500b0e..28c766cffb5 100644 --- a/backends/qualcomm/builders/op_min.py +++ b/backends/qualcomm/builders/op_min.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseMinimum, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_mul.py b/backends/qualcomm/builders/op_mul.py index 36e0c91cf7a..f003007e0df 100644 --- a/backends/qualcomm/builders/op_mul.py +++ b/backends/qualcomm/builders/op_mul.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseMultiply, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_ne.py b/backends/qualcomm/builders/op_ne.py index e9b723a88c5..1c7f87d4f4f 100644 --- a/backends/qualcomm/builders/op_ne.py +++ b/backends/qualcomm/builders/op_ne.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseNotEqual, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_neg.py b/backends/qualcomm/builders/op_neg.py index fd48cbe2791..911fbe742c8 100644 --- a/backends/qualcomm/builders/op_neg.py +++ b/backends/qualcomm/builders/op_neg.py @@ -8,7 +8,8 @@ import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseNeg, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_or.py b/backends/qualcomm/builders/op_or.py index 483831db0f7..c0a995d3631 100644 --- a/backends/qualcomm/builders/op_or.py +++ b/backends/qualcomm/builders/op_or.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseOr, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_pad.py b/backends/qualcomm/builders/op_pad.py index 7b210ed6838..7832e180ebb 100644 --- a/backends/qualcomm/builders/op_pad.py +++ b/backends/qualcomm/builders/op_pad.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA -from .node_visitor import NodeVisitor, QNN_TENSOR_TYPE_MAP, register_node_visitor +from .node_visitor import NodeVisitor, QNN_TENSOR_TYPE_MAP +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpPad, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_pow.py b/backends/qualcomm/builders/op_pow.py index 996d3b353e2..50568bfbcc1 100644 --- a/backends/qualcomm/builders/op_pow.py +++ b/backends/qualcomm/builders/op_pow.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWisePower, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_prelu.py b/backends/qualcomm/builders/op_prelu.py index b9ef6accb8a..5291acfbc8c 100644 --- a/backends/qualcomm/builders/op_prelu.py +++ b/backends/qualcomm/builders/op_prelu.py @@ -10,7 +10,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER -from .node_visitor import get_parameter, NodeVisitor, register_node_visitor +from .node_visitor import get_parameter, NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpPRelu, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_quantize.py b/backends/qualcomm/builders/op_quantize.py index e10f88795bb..7d7bd3ec9ec 100644 --- a/backends/qualcomm/builders/op_quantize.py +++ b/backends/qualcomm/builders/op_quantize.py @@ -10,7 +10,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_ENCODING, QCOM_QUANT_ATTRS -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpQuantize, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_relu.py b/backends/qualcomm/builders/op_relu.py index d237b84efe1..94afce56113 100644 --- a/backends/qualcomm/builders/op_relu.py +++ b/backends/qualcomm/builders/op_relu.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpRelu, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_repeat.py b/backends/qualcomm/builders/op_repeat.py index e5867e64447..abd0cff73e8 100644 --- a/backends/qualcomm/builders/op_repeat.py +++ b/backends/qualcomm/builders/op_repeat.py @@ -11,7 +11,8 @@ import numpy as np import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpTile, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_reshape.py b/backends/qualcomm/builders/op_reshape.py index 6e25c65e16d..6cc7d81af33 100644 --- a/backends/qualcomm/builders/op_reshape.py +++ b/backends/qualcomm/builders/op_reshape.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpReshape, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_resize.py b/backends/qualcomm/builders/op_resize.py index d9861a6c5bb..04216ce9d2c 100644 --- a/backends/qualcomm/builders/op_resize.py +++ b/backends/qualcomm/builders/op_resize.py @@ -11,7 +11,8 @@ from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpResize, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_rms_norm.py b/backends/qualcomm/builders/op_rms_norm.py index fdf49b09fef..6d5060f730b 100644 --- a/backends/qualcomm/builders/op_rms_norm.py +++ b/backends/qualcomm/builders/op_rms_norm.py @@ -19,7 +19,8 @@ ) from executorch.exir.dialects._ops import ops as exir_ops -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpRmsNorm, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_rsqrt.py b/backends/qualcomm/builders/op_rsqrt.py index b1995e28dde..0f0a069441d 100644 --- a/backends/qualcomm/builders/op_rsqrt.py +++ b/backends/qualcomm/builders/op_rsqrt.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseRsqrt, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_scalar_tensor.py b/backends/qualcomm/builders/op_scalar_tensor.py index 2e9154115bc..bb6b5825803 100644 --- a/backends/qualcomm/builders/op_scalar_tensor.py +++ b/backends/qualcomm/builders/op_scalar_tensor.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor @register_node_visitor diff --git a/backends/qualcomm/builders/op_select_copy.py b/backends/qualcomm/builders/op_select_copy.py index c5a7c0f7c99..69d237c282d 100644 --- a/backends/qualcomm/builders/op_select_copy.py +++ b/backends/qualcomm/builders/op_select_copy.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpStridedSlice, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_sigmoid.py b/backends/qualcomm/builders/op_sigmoid.py index ce820c8f4ee..20f933ed128 100644 --- a/backends/qualcomm/builders/op_sigmoid.py +++ b/backends/qualcomm/builders/op_sigmoid.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpSigmoid, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_sin.py b/backends/qualcomm/builders/op_sin.py index f9a0b1c2e63..5c389ca3b20 100644 --- a/backends/qualcomm/builders/op_sin.py +++ b/backends/qualcomm/builders/op_sin.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseSin, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_skip_ops.py b/backends/qualcomm/builders/op_skip_ops.py index 0d651e80f8a..f52f69d6019 100644 --- a/backends/qualcomm/builders/op_skip_ops.py +++ b/backends/qualcomm/builders/op_skip_ops.py @@ -10,7 +10,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor class OpSkipOps(NodeVisitor): diff --git a/backends/qualcomm/builders/op_slice_copy.py b/backends/qualcomm/builders/op_slice_copy.py index 7d3a154e9f1..b2a4cc15bea 100644 --- a/backends/qualcomm/builders/op_slice_copy.py +++ b/backends/qualcomm/builders/op_slice_copy.py @@ -10,7 +10,8 @@ import numpy as np import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpStridedSlice, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_softmax.py b/backends/qualcomm/builders/op_softmax.py index e7a4b9d2a04..556f5701f54 100644 --- a/backends/qualcomm/builders/op_softmax.py +++ b/backends/qualcomm/builders/op_softmax.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpSoftmax, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_space_to_depth.py b/backends/qualcomm/builders/op_space_to_depth.py index 84c79d841d8..74e31df475f 100644 --- a/backends/qualcomm/builders/op_space_to_depth.py +++ b/backends/qualcomm/builders/op_space_to_depth.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpSpaceToDepth, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_split_with_sizes.py b/backends/qualcomm/builders/op_split_with_sizes.py index b70d74aa339..fc5ba0f11fb 100644 --- a/backends/qualcomm/builders/op_split_with_sizes.py +++ b/backends/qualcomm/builders/op_split_with_sizes.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpSplit, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_sqrt.py b/backends/qualcomm/builders/op_sqrt.py index ff5a0c086e0..b71d4d68c30 100644 --- a/backends/qualcomm/builders/op_sqrt.py +++ b/backends/qualcomm/builders/op_sqrt.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseSquareRoot, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_squeeze.py b/backends/qualcomm/builders/op_squeeze.py index 94d6e5a3cf9..0cb7bf142b9 100644 --- a/backends/qualcomm/builders/op_squeeze.py +++ b/backends/qualcomm/builders/op_squeeze.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpReshape, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_stack.py b/backends/qualcomm/builders/op_stack.py index 25b7d353dc4..2d8587d51cd 100644 --- a/backends/qualcomm/builders/op_stack.py +++ b/backends/qualcomm/builders/op_stack.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpPack, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_sub.py b/backends/qualcomm/builders/op_sub.py index e7e5b22bb96..064d9b3cd42 100644 --- a/backends/qualcomm/builders/op_sub.py +++ b/backends/qualcomm/builders/op_sub.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseSubtract, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_sum_int_list.py b/backends/qualcomm/builders/op_sum_int_list.py index fc5546f9d33..af5fd1cecba 100644 --- a/backends/qualcomm/builders/op_sum_int_list.py +++ b/backends/qualcomm/builders/op_sum_int_list.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpReduceSum, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_tanh.py b/backends/qualcomm/builders/op_tanh.py index c06f44b312f..c61439398e3 100644 --- a/backends/qualcomm/builders/op_tanh.py +++ b/backends/qualcomm/builders/op_tanh.py @@ -10,7 +10,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpTanh, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_to.py b/backends/qualcomm/builders/op_to.py index 688b4857946..6774b0e3af6 100644 --- a/backends/qualcomm/builders/op_to.py +++ b/backends/qualcomm/builders/op_to.py @@ -10,7 +10,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS -from .node_visitor import NodeVisitor, QNN_TENSOR_TYPE_MAP, register_node_visitor +from .node_visitor import NodeVisitor, QNN_TENSOR_TYPE_MAP +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpCast, OpConvert, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_topk.py b/backends/qualcomm/builders/op_topk.py index 2b5d23268b9..f310752c8f6 100644 --- a/backends/qualcomm/builders/op_topk.py +++ b/backends/qualcomm/builders/op_topk.py @@ -16,7 +16,8 @@ QCOM_QUANT_ATTRS, ) -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpTopK, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_transpose.py b/backends/qualcomm/builders/op_transpose.py index 7fb02a2fb7c..dbed10ced46 100644 --- a/backends/qualcomm/builders/op_transpose.py +++ b/backends/qualcomm/builders/op_transpose.py @@ -12,7 +12,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_INSERTED_PERMUTE -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpTranspose, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_unbind.py b/backends/qualcomm/builders/op_unbind.py index 1c505e6f4fd..7db8bf07596 100644 --- a/backends/qualcomm/builders/op_unbind.py +++ b/backends/qualcomm/builders/op_unbind.py @@ -11,7 +11,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpUnpack, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_unsqueeze.py b/backends/qualcomm/builders/op_unsqueeze.py index f5cd7af3b2e..3408f3ec14f 100644 --- a/backends/qualcomm/builders/op_unsqueeze.py +++ b/backends/qualcomm/builders/op_unsqueeze.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpReshape, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_upsample_bilinear2d.py b/backends/qualcomm/builders/op_upsample_bilinear2d.py index ab8ab9b6452..7394823899e 100644 --- a/backends/qualcomm/builders/op_upsample_bilinear2d.py +++ b/backends/qualcomm/builders/op_upsample_bilinear2d.py @@ -10,7 +10,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpResizeBilinear, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_upsample_nearest2d.py b/backends/qualcomm/builders/op_upsample_nearest2d.py index a434880e290..a338f54b91f 100644 --- a/backends/qualcomm/builders/op_upsample_nearest2d.py +++ b/backends/qualcomm/builders/op_upsample_nearest2d.py @@ -10,7 +10,8 @@ import torch from executorch.backends.qualcomm.utils.constants import QCOM_DATA -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpResizeNearestNeighbor, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/builders/op_where.py b/backends/qualcomm/builders/op_where.py index 94ee1b0e940..460431a4814 100644 --- a/backends/qualcomm/builders/op_where.py +++ b/backends/qualcomm/builders/op_where.py @@ -9,7 +9,8 @@ import torch -from .node_visitor import NodeVisitor, register_node_visitor +from .node_visitor import NodeVisitor +from .node_visitor_manager import register_node_visitor from .qnn_constants import OpElementWiseSelect, QNN_OP_PACKAGE_NAME_QTI_AISW diff --git a/backends/qualcomm/partition/qnn_partitioner.py b/backends/qualcomm/partition/qnn_partitioner.py index 7e5a779e748..776923a1493 100644 --- a/backends/qualcomm/partition/qnn_partitioner.py +++ b/backends/qualcomm/partition/qnn_partitioner.py @@ -9,9 +9,12 @@ import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager import torch -from executorch.backends.qualcomm.builders import node_visitor +from executorch.backends.qualcomm.builders import node_visitor_manager from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader from executorch.backends.qualcomm.qnn_preprocess import QnnBackend +from executorch.backends.qualcomm.serialization.qc_schema_serialize import ( + flatbuffer_to_option, +) from executorch.backends.qualcomm.utils.constants import ( QCOM_AXIS_ORDER, QCOM_BYPASS_NODE, @@ -48,7 +51,12 @@ def __init__( skip_node_id_set: set = None, skip_node_op_set: set = None, ): - self.node_visitors = node_visitor.get_node_visitors(edge_program) + python_options = flatbuffer_to_option(compiler_specs[0].value) + self.node_visitors = node_visitor_manager.get_node_visitors( + edge_program, + op_package_infos=python_options.op_package_options.op_package_infos, + ) + self.skip_node_op_set = skip_node_op_set self.skip_node_id_set = skip_node_id_set self.nodes_to_wrappers = defaultdict(dict) diff --git a/backends/qualcomm/qnn_preprocess.py b/backends/qualcomm/qnn_preprocess.py index e7048f6b577..21b16a29c58 100644 --- a/backends/qualcomm/qnn_preprocess.py +++ b/backends/qualcomm/qnn_preprocess.py @@ -12,9 +12,12 @@ import torch # noqa: F401 from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager -from executorch.backends.qualcomm.builders.node_visitor import get_node_visitors +from executorch.backends.qualcomm.builders.node_visitor_manager import get_node_visitors from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader from executorch.backends.qualcomm.partition.utils import generate_qnn_executorch_option +from executorch.backends.qualcomm.serialization.qc_schema import ( + QnnExecuTorchOpPackageInfo, +) from executorch.backends.qualcomm.serialization.qc_schema_serialize import ( flatbuffer_to_option, option_to_flatbuffer, @@ -35,14 +38,20 @@ @final class QnnBackend(BackendDetails): @staticmethod - def _build_op_wrappers(edge_program: ExportedProgram, enable_tensor_dump: bool): + def _build_op_wrappers( + edge_program: ExportedProgram, + enable_tensor_dump: bool, + op_package_infos: List[QnnExecuTorchOpPackageInfo], + ): # QNN Delegate Specific Passes graph_module = QnnPassManager().transform_for_preprocess_pipeline(edge_program) assert graph_module is not None nodes_to_wrappers = defaultdict(dict) node_visitors = get_node_visitors( - edge_program, enable_tensor_dump=enable_tensor_dump + edge_program, + enable_tensor_dump=enable_tensor_dump, + op_package_infos=op_package_infos, ) py_op_wrapper_list = [] for node in graph_module.graph.nodes: @@ -95,8 +104,11 @@ def preprocess( option = generate_qnn_executorch_option(compile_specs) qnn_manager = PyQnnManager.QnnManager(option) qnn_manager.Init() + obj_options = flatbuffer_to_option(option) py_op_wrapper_list = QnnBackend._build_op_wrappers( - edge_program, qnn_manager.IsTensorDump() + edge_program, + qnn_manager.IsTensorDump(), + obj_options.op_package_options.op_package_infos, ) qnn_context_binary = qnn_manager.Compile( @@ -104,7 +116,6 @@ def preprocess( [[py_op_wrapper.GetOpWrapper() for py_op_wrapper in py_op_wrapper_list]], ) - obj_options = flatbuffer_to_option(option) if obj_options.saver: exit( f"Record all QNN API calls from saver backend at: {obj_options.saver_output_dir}" @@ -154,7 +165,9 @@ def preprocess_multimethod( for j, programs in enumerate(edge_programs.values()): logger.info(f"Processing Method({j}): ({i+1}/{num_sub_graphs})") py_op_wrappers = QnnBackend._build_op_wrappers( - programs[i], qnn_manager.IsTensorDump() + programs[i], + qnn_manager.IsTensorDump(), + option.op_package_options.op_package_infos, ) py_op_wrapper_list.append( [py_op_wrapper.GetOpWrapper() for py_op_wrapper in py_op_wrappers] diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp index 600bc072b06..0f64e8b9cce 100644 --- a/backends/qualcomm/runtime/QnnManager.cpp +++ b/backends/qualcomm/runtime/QnnManager.cpp @@ -75,6 +75,9 @@ QnnManager::QnnManager( "Is on-device graph construction: %d", options->online_prepare()); QNN_EXECUTORCH_LOG_INFO( "Enable shared buffer: %d", options->shared_buffer()); + QNN_EXECUTORCH_LOG_INFO( + "The number of op packages: %d", + options_->op_package_options()->op_package_infos()->size()); } if (library_path.empty()) { @@ -308,7 +311,8 @@ Error QnnManager::Init() { Internal, "Fail to configure Qnn backend cache"); ET_CHECK_OR_RETURN_ERROR( - backend_params_ptr_->qnn_backend_ptr_->Configure() == Error::Ok, + backend_params_ptr_->qnn_backend_ptr_->Configure( + options_->op_package_options()) == Error::Ok, Internal, "Fail to configure Qnn backend"); ET_CHECK_OR_RETURN_ERROR( diff --git a/backends/qualcomm/runtime/backends/CMakeLists.txt b/backends/qualcomm/runtime/backends/CMakeLists.txt index e4e0f6ada16..2497aa48340 100644 --- a/backends/qualcomm/runtime/backends/CMakeLists.txt +++ b/backends/qualcomm/runtime/backends/CMakeLists.txt @@ -96,6 +96,13 @@ target_sources( ${HOST_ARCHITECTURE}/HtpGraphCustomConfig.cpp ) +# qnn_op_package_manager +target_sources( + qnn_op_package_manager + PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnOpPackageManager.h + PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnOpPackageManager.cpp +) + # qnn_backend target_sources( qnn_backend diff --git a/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp b/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp index 310e38d1744..960bbd9513e 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp @@ -30,7 +30,46 @@ QnnBackend::~QnnBackend() { } } -Error QnnBackend::Configure() { +void QnnBackend::BackendRegisterOpPackage( + const flatbuffers::Vector< + flatbuffers::Offset>* + op_packages_infos) { + const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + Qnn_ErrorHandle_t error = QNN_SUCCESS; + QnnExecuTorchOpPackagePlatform current_platform = + QnnExecuTorchOpPackagePlatform::UNKNOWN; +#if defined(__x86_64__) + current_platform = QnnExecuTorchOpPackagePlatform::X86_64; +#elif defined(__ANDROID__) + current_platform = QnnExecuTorchOpPackagePlatform::AARCH64_ANDROID; +#endif + if (current_platform == QnnExecuTorchOpPackagePlatform::UNKNOWN) + QNN_EXECUTORCH_LOG_ERROR( + "Failed to detect the platform. Only support x86_64 or android."); + for (const auto op_package_info : *op_packages_infos) { + if (current_platform != op_package_info->platform() || + op_package_manager_.Has(op_package_info->op_package_path()->c_str())) + continue; + + error = qnn_interface.qnn_backend_register_op_package( + handle_, + op_package_info->op_package_path()->c_str(), + op_package_info->interface_provider()->c_str(), + EnumNameQnnExecuTorchOpPackageTarget(op_package_info->target())); + if (error != QNN_SUCCESS) { + QNN_EXECUTORCH_LOG_ERROR( + "Failed to register op package: " + "%s , error=%d", + op_package_info->op_package_path()->c_str(), + QNN_GET_ERROR_CODE(error)); + } else { + op_package_manager_.Add(op_package_info->op_package_path()->c_str()); + } + } +} + +Error QnnBackend::Configure( + const QnnExecuTorchOpPackageOptions* op_package_options) { // create qnn backend const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; @@ -54,6 +93,11 @@ Error QnnBackend::Configure() { QNN_GET_ERROR_CODE(error)); return Error::Internal; } + + if (op_package_options->op_package_infos()->size() > 0) { + BackendRegisterOpPackage(op_package_options->op_package_infos()); + } + return Error::Ok; } diff --git a/backends/qualcomm/runtime/backends/QnnBackendCommon.h b/backends/qualcomm/runtime/backends/QnnBackendCommon.h index 58bdee10846..a66119dab22 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendCommon.h +++ b/backends/qualcomm/runtime/backends/QnnBackendCommon.h @@ -7,10 +7,12 @@ */ #pragma once +#include #include #include #include - +#include +#include #include #include "HTP/QnnHtpCommon.h" @@ -36,7 +38,8 @@ class QnnBackend { return false; } - executorch::runtime::Error Configure(); + executorch::runtime::Error Configure( + const QnnExecuTorchOpPackageOptions* op_package_options); Qnn_ErrorHandle_t BackendValidateOpConfig(const Qnn_OpConfig_t& op_config) { return implementation_.GetQnnInterface().qnn_backend_validate_op_config( @@ -57,8 +60,13 @@ class QnnBackend { }; private: + void BackendRegisterOpPackage( + const flatbuffers::Vector< + flatbuffers::Offset>* + op_packages_info); Qnn_BackendHandle_t handle_; const QnnImplementation& implementation_; + QnnOpPackageManager op_package_manager_; QnnLogger* logger_; executorch::runtime::Error VersionChecker( const Qnn_Version_t& qnn_version, diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp index feaf911c541..2fbb2243d8d 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp @@ -25,13 +25,12 @@ std::unique_ptr QnnBackendFactory::Create( switch (options->backend_options()->backend_type()) { case QnnExecuTorchBackendType::kHtpBackend: { auto htp_options = options->backend_options()->htp_options(); + const std::string skel_library_dir = + htp_options->skel_library_dir()->str(); + if (!skel_library_dir.empty()) { + setenv("ADSP_LIBRARY_PATH", skel_library_dir.c_str(), /*overwrite=*/1); + } if (options->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) { - const std::string skel_library_dir = - htp_options->skel_library_dir()->str(); - if (!skel_library_dir.empty()) { - setenv( - "ADSP_LIBRARY_PATH", skel_library_dir.c_str(), /*overwrite=*/1); - } QNN_EXECUTORCH_LOG_INFO( "skel_library_dir: %s", skel_library_dir.c_str()); QNN_EXECUTORCH_LOG_INFO( diff --git a/backends/qualcomm/runtime/backends/QnnImplementation.cpp b/backends/qualcomm/runtime/backends/QnnImplementation.cpp index 7add5e744f9..a9136a83c9c 100644 --- a/backends/qualcomm/runtime/backends/QnnImplementation.cpp +++ b/backends/qualcomm/runtime/backends/QnnImplementation.cpp @@ -51,8 +51,16 @@ Error QnnImplementation::StartBackend( const std::string& lib_path, const QnnSaver_Config_t** saver_config) { Qnn_ErrorHandle_t error = QNN_SUCCESS; - void* lib_handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL); - + // RTLD_GLOBAL is needed on x86 as HTP op package has a requirement for the + // symbols in backend to be visible. Using RTLD_LOCAL on Android to allow full + // unloading of HTP backend shared library on dlclose() as RTLD_GLOBAL isn't + // letting it happen. + void* lib_handle = nullptr; +#if defined(__ANDROID__) + lib_handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_LOCAL); +#else + lib_handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL); +#endif if (lib_handle == nullptr) { QNN_EXECUTORCH_LOG_ERROR( "Cannot Open QNN library %s, with error: %s", diff --git a/backends/qualcomm/runtime/backends/QnnOpPackageManager.cpp b/backends/qualcomm/runtime/backends/QnnOpPackageManager.cpp new file mode 100644 index 00000000000..f0fe7ab34de --- /dev/null +++ b/backends/qualcomm/runtime/backends/QnnOpPackageManager.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#include +namespace executorch { +namespace backends { +namespace qnn { + +bool QnnOpPackageManager::Add(std::string qnn_op_name) { + const std::lock_guard lock(table_mutex_); + std::pair ret = + qnn_op_package_path_set_.emplace(qnn_op_name); + return ret.second; +} + +bool QnnOpPackageManager::Has(std::string qnn_op_name) { + const std::lock_guard lock(table_mutex_); + return qnn_op_package_path_set_.count(qnn_op_name) > 0; +} + +bool QnnOpPackageManager::Erase(std::string qnn_op_name) { + const std::lock_guard lock(table_mutex_); + return qnn_op_package_path_set_.erase(qnn_op_name) > 0; +} + +void QnnOpPackageManager::Clear() { + const std::lock_guard lock(table_mutex_); + qnn_op_package_path_set_.clear(); +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/QnnOpPackageManager.h b/backends/qualcomm/runtime/backends/QnnOpPackageManager.h new file mode 100644 index 00000000000..02e522db365 --- /dev/null +++ b/backends/qualcomm/runtime/backends/QnnOpPackageManager.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#pragma once +#include +#include + +namespace executorch { +namespace backends { +namespace qnn { +class QnnOpPackageManager { + public: + QnnOpPackageManager() = default; + ~QnnOpPackageManager() = default; + + QnnOpPackageManager(const QnnOpPackageManager& rhs) = delete; + QnnOpPackageManager(QnnOpPackageManager&& rhs) = delete; + QnnOpPackageManager& operator=(const QnnOpPackageManager& rhs) = delete; + QnnOpPackageManager& operator=(QnnOpPackageManager&& rhs) = delete; + + bool Add(std::string qnn_op_name); + + bool Has(std::string qnn_op_name); + + bool Erase(std::string qnn_op_name); + + void Clear(); + + private: + std::unordered_set qnn_op_package_path_set_; + std::mutex table_mutex_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp index bf73a77f9f2..050a679e62a 100644 --- a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp +++ b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp @@ -70,7 +70,8 @@ Error QnnDlcManager::Configure() { Internal, "Fail to configure Qnn backend cache"); ET_CHECK_OR_RETURN_ERROR( - backend_params_ptr_->qnn_backend_ptr_->Configure() == Error::Ok, + backend_params_ptr_->qnn_backend_ptr_->Configure( + options_->op_package_options()) == Error::Ok, Internal, "Fail to configure Qnn backend"); ET_CHECK_OR_RETURN_ERROR( diff --git a/backends/qualcomm/serialization/qc_compiler_spec.fbs b/backends/qualcomm/serialization/qc_compiler_spec.fbs index 656bb5c76af..8aeaa060a50 100644 --- a/backends/qualcomm/serialization/qc_compiler_spec.fbs +++ b/backends/qualcomm/serialization/qc_compiler_spec.fbs @@ -151,6 +151,51 @@ enum QnnExecuTorchProfileLevel: int { kProfileOptrace, } +/// The target of the op package library. +enum QnnExecuTorchOpPackageTarget: int { + UNKNOWN = 0, + CPU, + HTP, +} + +/// The platform of the op package library. +enum QnnExecuTorchOpPackagePlatform: int { + UNKNOWN = 0, + X86_64, + AARCH64_ANDROID, +} + + +table QnnExecuTorchOpPackageInfo { + /// The name of the op package. + op_package_name:string; + + /// The path on disk to the op package library. + op_package_path:string; + + /// The name of a function in the op package library which satisfies the + /// QnnOpPackage_InterfaceProvider_t interface. + interface_provider:string; + + /// The target which this op package library was compiled for. + target:QnnExecuTorchOpPackageTarget; + + /// The name of torch operator. + custom_op_name:string; + + /// The corresponding op type name defined in the op package. + qnn_op_type_name:string; + + /// The platform which this op package library was compiled for. + platform:QnnExecuTorchOpPackagePlatform; +} + + +table QnnExecuTorchOpPackageOptions { + /// An array of QnnExecuTorchOpPackageInfo structures. + op_package_infos:[QnnExecuTorchOpPackageInfo]; +} + /// QNN backends currently supported table QnnExecuTorchBackendOptions { /// The backend QNN library to open and execute the graph with. This is a @@ -198,6 +243,9 @@ table QnnExecuTorchOptions { // Path to saver output folder saver_output_dir:string; + + /// Optional structure to specify op packages loaded and used by the backend. + op_package_options:QnnExecuTorchOpPackageOptions; } root_type QnnExecuTorchOptions; diff --git a/backends/qualcomm/serialization/qc_schema.py b/backends/qualcomm/serialization/qc_schema.py index 84ce23701ef..f3b9e2cc1a5 100644 --- a/backends/qualcomm/serialization/qc_schema.py +++ b/backends/qualcomm/serialization/qc_schema.py @@ -145,6 +145,36 @@ class QnnExecuTorchBackendOptions: htp_options: QnnExecuTorchHtpBackendOptions +@unique +class QnnExecuTorchOpPackageTarget(IntEnum): + UNKNOWN = 0 + CPU = 1 + HTP = 2 + + +@unique +class QnnExecuTorchOpPackagePlatform(IntEnum): + UNKNOWN = 0 + X86_64 = 1 + AARCH64_ANDROID = 2 + + +@dataclass +class QnnExecuTorchOpPackageInfo: + op_package_name: str = "" + op_package_path: str = "" + interface_provider: str = "" + target: QnnExecuTorchOpPackageTarget = QnnExecuTorchOpPackageTarget.UNKNOWN + custom_op_name: str = "" + qnn_op_type_name: str = "" + platform: QnnExecuTorchOpPackagePlatform = QnnExecuTorchOpPackagePlatform.UNKNOWN + + +@dataclass +class QnnExecuTorchOpPackageOptions: + op_package_infos: List[QnnExecuTorchOpPackageInfo] = field(default_factory=list) + + @dataclass class QnnExecuTorchOptions: soc_info: SocInfo @@ -159,3 +189,6 @@ class QnnExecuTorchOptions: is_from_context_binary: bool = False saver: bool = False saver_output_dir: str = "saver_output" + op_package_options: QnnExecuTorchOpPackageOptions = field( + default_factory=QnnExecuTorchOpPackageOptions + ) diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 71d953a5a7b..ece8c8f347d 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -76,7 +76,7 @@ FoldQDQ, TagQuantIO, ) -from executorch.backends.qualcomm.builders.node_visitor import get_node_visitors +from executorch.backends.qualcomm.builders.node_visitor_manager import get_node_visitors from executorch.backends.qualcomm.debugger.utils import DrawGraph from executorch.examples.models.deeplab_v3 import DeepLabV3ResNet101Model from executorch.examples.models.edsr import EdsrModel @@ -3729,16 +3729,6 @@ def test_qnn_backend_generate_optrace(self): class TestExampleLLMScript(TestQNN): - def required_envs(self, conditions=None) -> bool: - conditions = [] if conditions is None else conditions - return all( - [ - self.executorch_root, - self.artifact_dir, - *conditions, - ] - ) - def test_llama3_2_1b(self): if not self.required_envs(): self.skipTest("missing required envs") @@ -3896,16 +3886,6 @@ def test_llama_stories_110m(self): class TestExampleOssScript(TestQNN): - def required_envs(self, conditions=None) -> bool: - conditions = [] if conditions is None else conditions - return all( - [ - self.executorch_root, - self.artifact_dir, - *conditions, - ] - ) - def test_conv_former(self): if not self.required_envs([self.image_dataset]): self.skipTest("missing required envs") @@ -4637,16 +4617,6 @@ def test_ssd300_vgg16(self): class TestExampleQaihubScript(TestQNN): - def required_envs(self, conditions=None) -> bool: - conditions = [] if conditions is None else conditions - return all( - [ - self.executorch_root, - self.artifact_dir, - *conditions, - ] - ) - def test_utils_export(self): with tempfile.TemporaryDirectory() as tmp_dir: module = ContextBinaryExample() # noqa: F405 @@ -4875,16 +4845,6 @@ def test_stable_diffusion(self): class TestExampleScript(TestQNN): - def required_envs(self, conditions=None) -> bool: - conditions = [] if conditions is None else conditions - return all( - [ - self.executorch_root, - self.artifact_dir, - *conditions, - ] - ) - def test_mobilenet_v2(self): if not self.required_envs([self.image_dataset]): self.skipTest("missing required envs") @@ -5307,6 +5267,40 @@ def required_envs(self, conditions=None) -> bool: ] ) + def test_custom_op(self): + if not self.required_envs([self.op_package_dir]): + self.skipTest("missing required envs") + cmds = [ + "python", + f"{self.executorch_root}/examples/qualcomm/custom_op/custom_ops_1.py", + "--artifact", + self.artifact_dir, + "--build_folder", + self.build_folder, + "--device", + self.device, + "--model", + self.model, + "--ip", + self.ip, + "--port", + str(self.port), + "--op_package_dir", + self.op_package_dir, + "--build_op_package", + ] + if self.host: + cmds.extend(["--host", self.host]) + if self.enable_x86_64: + cmds.extend(["--enable_x86_64"]) + + p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL) + with Listener((self.ip, self.port)) as listener: + conn = listener.accept() + p.communicate() + msg = json.loads(conn.recv()) + self.assertTrue(msg["is_close"]) + def test_debugger_generate_optrace(self): cmds = [ "python", @@ -5396,6 +5390,13 @@ def setup_environment(): help="Path to open source software model repository", type=str, ) + parser.add_argument( + "-d", + "--op_package_dir", + help="Path to operator package which generates from qnn-op-package-generator", + default="", + type=str, + ) parser.add_argument( "--pre_gen_pte", @@ -5429,7 +5430,7 @@ def setup_environment(): TestQNN.compile_only = args.compile_only TestQNN.pre_gen_pte = args.pre_gen_pte TestQNN.llama_artifacts = args.llama_artifacts - + TestQNN.op_package_dir = args.op_package_dir return sys.argv[:1] + ns_args diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index f02aed25cd7..792ee1b90a5 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -185,6 +185,7 @@ class TestQNN(unittest.TestCase): image_dataset: str = "" pretrained_weight: str = "" enable_profile: bool = False + op_package_dir: str = "" online_prepare: bool = False use_8a8w: str = "8a8w" use_16a16w: str = "16a16w" @@ -238,6 +239,16 @@ def _save_model_and_expected_output( return input_list, ref_outputs, pte_fname + def required_envs(self, conditions=None) -> bool: + conditions = [] if conditions is None else conditions + return all( + [ + self.executorch_root, + self.artifact_dir, + *conditions, + ] + ) + def verify_output( # noqa: C901 self, module: torch.nn.Module, @@ -250,6 +261,7 @@ def verify_output( # noqa: C901 input_encodings: Tuple = (), output_encodings: Tuple = (), check_io_shape: bool = False, + op_package_paths: List[str] = None, ): with tempfile.TemporaryDirectory() as tmp_dir: ( @@ -419,7 +431,11 @@ def validate_intermediate_tensor(): else None ), ) - adb.push(inputs=[processed_inputs], input_list=input_list) + adb.push( + inputs=[processed_inputs], + input_list=input_list, + files=op_package_paths, + ) adb.execute(method_index=method_index) adb.pull(output_path=tmp_dir, callback=post_process) self._assert_outputs_equal(outputs, ref_outputs) diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py index 7ecef7ababe..f37e1c6b9ec 100644 --- a/backends/qualcomm/utils/utils.py +++ b/backends/qualcomm/utils/utils.py @@ -37,6 +37,7 @@ QnnExecuTorchHtpPerformanceMode, QnnExecuTorchHtpPrecision, QnnExecuTorchLogLevel, + QnnExecuTorchOpPackageOptions, QnnExecuTorchOptions, QnnExecuTorchProfileLevel, ) @@ -909,6 +910,7 @@ def generate_qnn_executorch_compiler_spec( shared_buffer: bool = False, is_from_context_binary: bool = False, graph_name: str = "forward", + op_package_options: QnnExecuTorchOpPackageOptions = None, ) -> List[CompileSpec]: """ Helper function generating compiler specs for Qualcomm AI Engine Direct @@ -937,6 +939,8 @@ def generate_qnn_executorch_compiler_spec( and backend for graph I/O. is_from_context_binary: True if current graph comes from pre-built context binary. graph_name: Assign unique graph name if lowering multiple methods. + op_package_options: Optional structure to specify op packages + loaded and used by the backend. Returns: List[CompileSpec]: Compiler specs for Qualcomm AI Engine Direct. @@ -996,6 +1000,9 @@ def generate_qnn_executorch_compiler_spec( qnn_executorch_options.online_prepare = online_prepare qnn_executorch_options.is_from_context_binary = is_from_context_binary + if op_package_options and len(op_package_options.op_package_infos) > 0: + qnn_executorch_options.op_package_options = op_package_options + return [ CompileSpec(QCOM_QNN_COMPILE_SPEC, option_to_flatbuffer(qnn_executorch_options)) ] diff --git a/examples/qualcomm/custom_op/README.md b/examples/qualcomm/custom_op/README.md new file mode 100644 index 00000000000..e3d6b216d8b --- /dev/null +++ b/examples/qualcomm/custom_op/README.md @@ -0,0 +1,97 @@ +# Custom Operator Support +The Qualcomm AI Engine Direct Backend in ExecuTorch supports custom PyTorch operators via the Qualcomm AI Engine Direct Op Package mechanism. Custom PyTorch operators, utilizing the torch.library API, can be successfully delegated and supported through user-written op packages. Additionally, built-in PyTorch nodes can be overridden by these op packages. + +Note: The Qualcomm AI Engine Direct SDK is required to compile an OP package. + +This folder contains examples demonstrating how to register custom operators into PyTorch and how to register their op packages into the Qualcomm AI Engine Direct Backend in ExecuTorch. +## Prerequisite + +- Please finish tutorial [Setting up executorch](https://pytorch.org/executorch/stable/getting-started-setup). + +- Please finish [setup QNN backend](../../../docs/source/backends-qualcomm.md). + +- Please follow [the instructions to install proper version of Hexagon SDK and Hexagon Tools.](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/linux_setup.html#htp-and-dsp) + - This example is verified with SM8650 (Snapdragon 8 Gen 3). + - Install hexagon-sdk-5.4.0, hexagon-sdk-6.0.0, and hexagon tool 8.8.02 + ```bash + # install hexagon sdk 5.4.0 + qpm-cli --install hexagonsdk5.x --version 5.4.0.3 --path /path/to/Qualcomm/Hexagon_SDK/hexagon-sdk-5.4.0 + # install hexagon sdk 6.0.0 + qpm-cli --install hexagonsdk6.x --version 6.0.0.2 --path /path/to/Qualcomm/Hexagon_SDK/hexagon-sdk-6.0.0 + # install hexagon tool 8.8.02 + qpm-cli --extract hexagon8.8 --version 8.8.02.1 --path /path/to/Qualcomm/Hexagon_SDK/hexagon-sdk-6.0.0/tools/HEXAGON_Tools/8.8.02 + ``` + +## Setup environment variables +`$HEXAGON_SDK_ROOT` refers to the root of the specified version of Hexagon SDK, i.e., the directory containing `readme.txt` + +`$X86_CXX` refers to the clang++ compiler, verified with clang++9 + +```bash +export HEXAGON_SDK_ROOT=/path/to/Qualcomm/Hexagon_SDK/hexagon-sdk-5.4.0 +export X86_CXX=/path/to/clang-9.0.0/bin/clang++ +``` + + +## Instructions to build and run the example +Use the following command, we can get the op package for the custom op `ExampleCustomOp`. And then compiling the custom model containing the custom op `torch.ops.my_ops.mul3.default` to Qualcomm AI Engine Direct binary with the op package. + +```bash +python3 examples/qualcomm/custom_op/custom_ops_1.py --build_folder build-android -s -H -m SM8650 --op_package_dir examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage --build_op_package +``` + +## How to quantize custom op in Qualcomm AI Engine Direct backend +Use the custom annotation in Qnn Quantizer +```python +quantizer = make_quantizer( + quant_dtype=quant_dtype, custom_annotations=(annotate_custom,) +) +``` + +## Generating Op Packages +To generate operation (op) packages, follow these steps: + +1. Define an XML OpDef Configuration File: + - Create an XML file that describes the package information, including the package name, version, and domain. + - Specify the operations the package contains. Refer to [the example op package XML file](example_op_package_htp/ExampleOpPackage/config/example_op_package_htp.xml) for guidance. +2. Generate Skeleton Sample Code: + - Once the XML file is fully defined according to the specifications, pass it as an argument to the `qnn-op-package-generator` tool using the --config_path or -p option. + - This will generate the skeleton sample code. +3. Implement the Operations: + - The generated interface generally does not require extra implementation. + - The source files will contain empty function bodies that need to be completed by users. Refer to [the example op package for implementation details](example_op_package_htp/ExampleOpPackage/src/ops/ExampleCustomOp.cpp). +4. Support Custom PyTorch Operators: + - To support the parameters of custom PyTorch operators, a custom op builder is generated from the meta and `_schema.argument` of `torch.fx.Node`. + - Ensure that the OpDef of the op package aligns with the schema of the custom PyTorch operators. + +## Op package format +### Inputs +in[0]…in[m-1] + +The same number of input tensors as defined in the PyTorch custom op. Where ``m`` is +the number of inputs. + +* Mandatory: true +* Data type: backend specific +* Shape: Any + +### Parameters + +Optionally, define one or more parameters for the operation. +* Mandatory: true +* Data type: backend specific +* Shape: Any + +### Outputs +out[0] + +For now, only support one output tensors. + +* Mandatory: true +* Data type: backend specific +* Shape: Any + +Consult the Qualcomm AI Engine Direct documentation for information on [generation op packages](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/op_def_schema.html). + +## Registering Op Packages +After an op package library has been generated, certain information needs to be passed to the `compile_spec` in order to properly delegate the nodes. [The example script](custom_ops_1.py) shows how to construct the `QnnExecuTorchOpPackageOptions` and register op packages with the `compile spec`. diff --git a/examples/qualcomm/custom_op/custom_ops_1.py b/examples/qualcomm/custom_op/custom_ops_1.py new file mode 100644 index 00000000000..4a865197584 --- /dev/null +++ b/examples/qualcomm/custom_op/custom_ops_1.py @@ -0,0 +1,353 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Example of showcasing registering custom operator through torch library API.""" +import json +import os +import subprocess +import sys +from multiprocessing.connection import Client + +import numpy as np +import torch + +from executorch.backends.qualcomm.quantizer.quantizer import QuantDtype +from executorch.backends.qualcomm.serialization.qc_schema import ( + _soc_info_table, + HtpArch, + QcomChipset, + QnnExecuTorchOpPackageInfo, + QnnExecuTorchOpPackageOptions, + QnnExecuTorchOpPackagePlatform, + QnnExecuTorchOpPackageTarget, +) +from executorch.examples.qualcomm.utils import ( + build_executorch_binary, + generate_inputs, + make_output_dir, + make_quantizer, + setup_common_args_and_variables, + SimpleADB, +) +from torch.library import impl, Library + +my_op_lib = Library("my_ops", "DEF") + +# registering an operator that multiplies input tensor by 3 and returns it. +my_op_lib.define("mul3(Tensor input) -> Tensor") # should print 'mul3' + + +@impl(my_op_lib, "mul3", dispatch_key="CompositeExplicitAutograd") +def mul3_impl(a: torch.Tensor) -> torch.Tensor: + return a * 3 + + +# registering the out variant. +my_op_lib.define( + "mul3.out(Tensor input, *, Tensor(a!) output) -> Tensor(a!)" +) # should print 'mul3.out' + + +@impl(my_op_lib, "mul3.out", dispatch_key="CompositeExplicitAutograd") +def mul3_out_impl(a: torch.Tensor, *, out: torch.Tensor) -> torch.Tensor: + out.copy_(a) + out.mul_(3) + return out + + +# example model +class Model(torch.nn.Module): + def forward(self, a): + return torch.ops.my_ops.mul3.default(a) + + +def annotate_custom(gm: torch.fx.GraphModule) -> None: + """ + This function is specific for custom op. + The source_fn of the rewritten nn module turns out to be "my_ops.mul3.default" + """ + from executorch.backends.qualcomm.quantizer.annotators import ( + _is_annotated, + QUANT_ANNOTATION_KEY, + ) + + from executorch.backends.qualcomm.quantizer.qconfig import ( + get_ptq_per_channel_quant_config, + ) + from torch.fx import Node + from torchao.quantization.pt2e.quantizer import QuantizationAnnotation + + quantization_config = get_ptq_per_channel_quant_config() + for node in gm.graph.nodes: + if node.target != torch.ops.my_ops.mul3.default: + continue + + # skip annotation if it is already annotated + if _is_annotated([node]): + continue + + input_qspec_map = {} + input_act = node.args[0] + assert isinstance(input_act, Node) + input_spec = quantization_config.input_activation + input_qspec_map[input_act] = input_spec + + node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( + input_qspec_map=input_qspec_map, + output_qspec=quantization_config.output_activation, + _annotated=True, + ) + + +def create_device_inputs(example_inputs): + input_list = "" + for idx, _ in enumerate(example_inputs): + input_name = f"input_0_{idx}.raw" + input_list += input_name + " " + input_list = input_list.strip() + "\n" + return input_list + + +def _run(cmd, cwd=None): + subprocess.run(cmd, stdout=sys.stdout, cwd=cwd, check=True) + + +def prepare_op_package( + workspace: str, op_package_dir: str, arch: HtpArch, build_op_package: bool +): + if build_op_package: + _run(["rm", "-rf", "build"], cwd=op_package_dir) + _run(["make", "htp_x86", "htp_aarch64", f"htp_v{arch}"], cwd=op_package_dir) + _run( + [ + "cp", + f"{op_package_dir}/build/hexagon-v{arch}/libQnnExampleOpPackage.so", + f"{op_package_dir}/build/hexagon-v{arch}/libQnnExampleOpPackage_HTP.so", + ] + ) + + op_package_paths = [ + f"{op_package_dir}/build/hexagon-v{arch}/libQnnExampleOpPackage_HTP.so", + f"{op_package_dir}/build/aarch64-android/libQnnExampleOpPackage.so", + ] + + op_package_infos_HTP = QnnExecuTorchOpPackageInfo() + op_package_infos_HTP.interface_provider = "ExampleOpPackageInterfaceProvider" + op_package_infos_HTP.op_package_name = "ExampleOpPackage" + op_package_infos_HTP.op_package_path = f"{workspace}/libQnnExampleOpPackage_HTP.so" + op_package_infos_HTP.target = QnnExecuTorchOpPackageTarget.HTP + op_package_infos_HTP.custom_op_name = "my_ops.mul3.default" + op_package_infos_HTP.qnn_op_type_name = "ExampleCustomOp" + op_package_infos_HTP.platform = QnnExecuTorchOpPackagePlatform.AARCH64_ANDROID + op_package_infos_aarch64_CPU = QnnExecuTorchOpPackageInfo() + op_package_infos_aarch64_CPU.interface_provider = ( + "ExampleOpPackageInterfaceProvider" + ) + op_package_infos_aarch64_CPU.op_package_name = "ExampleOpPackage" + op_package_infos_aarch64_CPU.op_package_path = ( + f"{workspace}/libQnnExampleOpPackage.so" + ) + op_package_infos_aarch64_CPU.target = QnnExecuTorchOpPackageTarget.CPU + op_package_infos_aarch64_CPU.custom_op_name = "my_ops.mul3.default" + op_package_infos_aarch64_CPU.qnn_op_type_name = "ExampleCustomOp" + op_package_infos_aarch64_CPU.platform = ( + QnnExecuTorchOpPackagePlatform.AARCH64_ANDROID + ) + op_package_infos_x86_CPU = QnnExecuTorchOpPackageInfo() + op_package_infos_x86_CPU.interface_provider = "ExampleOpPackageInterfaceProvider" + op_package_infos_x86_CPU.op_package_name = "ExampleOpPackage" + op_package_infos_x86_CPU.op_package_path = ( + f"{op_package_dir}/build/x86_64-linux-clang/libQnnExampleOpPackage.so" + ) + op_package_infos_x86_CPU.target = QnnExecuTorchOpPackageTarget.CPU + op_package_infos_x86_CPU.custom_op_name = "my_ops.mul3.default" + op_package_infos_x86_CPU.qnn_op_type_name = "ExampleCustomOp" + op_package_infos_x86_CPU.platform = QnnExecuTorchOpPackagePlatform.X86_64 + op_package_options = QnnExecuTorchOpPackageOptions() + op_package_options.op_package_infos = [ + op_package_infos_x86_CPU, + op_package_infos_aarch64_CPU, + op_package_infos_HTP, + ] + + return op_package_options, op_package_paths + + +def main(args): + if args.build_op_package: + if "HEXAGON_SDK_ROOT" not in os.environ: + raise RuntimeError("Environment variable HEXAGON_SDK_ROOT must be set") + print(f"HEXAGON_SDK_ROOT={os.getenv('HEXAGON_SDK_ROOT')}") + + if "ANDROID_NDK_ROOT" not in os.environ: + raise RuntimeError("Environment variable ANDROID_NDK_ROOT must be set") + print(f"ANDROID_NDK_ROOT={os.getenv('ANDROID_NDK_ROOT')}") + + # ensure the working directory exist. + os.makedirs(args.artifact, exist_ok=True) + + if not args.compile_only and args.device is None: + raise RuntimeError( + "device serial is required if not compile only. " + "Please specify a device serial by -s/--device argument." + ) + + quant_dtype = QuantDtype.use_8a8w + if args.use_fp16: + quant_dtype = None + + instance = Model() + pte_filename = "custom_qnn" + sample_input = (torch.ones(1, 32, 28, 28),) + workspace = f"/data/local/tmp/executorch/{pte_filename}" + + input_list = create_device_inputs(sample_input) + soc_info = _soc_info_table[getattr(QcomChipset, args.model)] + + op_package_options, op_package_paths = prepare_op_package( + workspace, + args.op_package_dir, + soc_info.htp_info.htp_arch, + args.build_op_package, + ) + quantizer = make_quantizer( + quant_dtype=quant_dtype, custom_annotations=(annotate_custom,) + ) + + build_executorch_binary( + instance, + sample_input, + args.model, + f"{args.artifact}/{pte_filename}", + sample_input, + op_package_options=op_package_options, + quant_dtype=quant_dtype, + custom_quantizer=quantizer, + ) + + if args.compile_only: + sys.exit(0) + + # collect output data + output_data_folder = f"{args.artifact}/outputs" + make_output_dir(output_data_folder) + + if args.enable_x86_64: + input_list_filename = "input_list.txt" + input_list = f"{args.artifact}/{input_list}" + generate_inputs(args.artifact, input_list_filename, sample_input, input_list) + qnn_sdk = os.getenv("QNN_SDK_ROOT") + assert qnn_sdk, "QNN_SDK_ROOT was not found in environment variable" + target = "x86_64-linux-clang" + + runner_cmd = " ".join( + [ + f"export LD_LIBRARY_PATH={qnn_sdk}/lib/{target}/:{args.build_folder}/lib &&", + f"./{args.build_folder}/examples/qualcomm/executor_runner/qnn_executor_runner", + f"--model_path {args.artifact}/{pte_filename}.pte", + f"--input_list_path {args.artifact}/{input_list_filename}", + f"--output_folder_path {output_data_folder}", + ] + ) + subprocess.run( + runner_cmd, + # stdout=subprocess.PIPE, + # stderr=subprocess.STDOUT, + shell=True, + executable="/bin/bash", + capture_output=True, + ) + else: + # setup required paths accordingly + # qnn_sdk : QNN SDK path setup in environment variable + # artifact_path : path where artifacts were built + # pte_path : path where executorch binary was stored + # device_id : serial number of android device + # workspace : folder for storing artifacts on android device + adb = SimpleADB( + qnn_sdk=os.getenv("QNN_SDK_ROOT"), + build_path=f"{args.build_folder}", + pte_path=f"{args.artifact}/{pte_filename}.pte", + workspace=workspace, + device_id=args.device, + host_id=args.host, + soc_model=args.model, + ) + adb.push(inputs=sample_input, input_list=input_list, files=op_package_paths) + adb.execute() + adb.pull(output_path=args.artifact) + + x86_golden = instance(*sample_input) + device_output = torch.from_numpy( + np.fromfile( + os.path.join(output_data_folder, "output_0_0.raw"), dtype=np.float32 + ) + ).reshape(x86_golden.size()) + result = torch.all(torch.isclose(x86_golden, device_output, atol=1e-2)).tolist() + + if args.ip and args.port != -1: + with Client((args.ip, args.port)) as conn: + conn.send( + json.dumps( + { + "is_close": result, + } + ) + ) + else: + print(f"is_close? {result}") + if not result: + print(f"x86_golden {x86_golden}") + print(f"device_out {device_output}") + + +if __name__ == "__main__": + parser = setup_common_args_and_variables() + + parser.add_argument( + "-a", + "--artifact", + help="path for storing generated artifacts by this example. Default ./custom_op", + default="./custom_op", + type=str, + ) + + parser.add_argument( + "-d", + "--op_package_dir", + help="Path to operator package which generates from QNN.", + type=str, + required=True, + ) + + parser.add_argument( + "-F", + "--use_fp16", + help="If specified, will run in fp16 precision and discard ptq setting", + action="store_true", + default=False, + ) + + parser.add_argument( + "--build_op_package", + help="Build op package based on op_package_dir. Please set up " + "`HEXAGON_SDK_ROOT` and `ANDROID_NDK_ROOT` environment variable. " + "And add clang compiler into `PATH`. Please refer to Qualcomm AI Engine " + "Direct SDK document to get more details", + action="store_true", + default=False, + ) + + args = parser.parse_args() + + try: + main(args) + except Exception as e: + if args.ip and args.port != -1: + with Client((args.ip, args.port)) as conn: + conn.send(json.dumps({"Error": str(e)})) + else: + raise Exception(e) diff --git a/examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage/Makefile b/examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage/Makefile new file mode 100644 index 00000000000..8d37e042640 --- /dev/null +++ b/examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage/Makefile @@ -0,0 +1,364 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# users should provide locations for QNN_INCLUDE and HEXAGON_SDK_ROOT +# export HEXAGON_SDK_ROOT = /path/to/hexagon-sdk + +# check all setup prerequisites if the command goal is not clean +ifneq ($(MAKECMDGOALS),clean) +ifndef QNN_INCLUDE +$(info "INFO: Qnn include not explicitly defined, attempting to use QNN_SDK_ROOT if it is valid") +QNN_INCLUDE := $(QNN_SDK_ROOT)/include/QNN +endif +ifeq ($(wildcard $(QNN_INCLUDE)),) +$(error "ERROR: QNN_INCLUDE path is not set. QNN include paths must be set to obtain BE headers necessary to compile the package") +endif +ifndef QNN_TARGET_LIB +$(info "INFO: Qnn target not explicitly defined, attempting to use QNN_SDK_ROOT if it is valid") +QNN_TARGET_LIB := $(QNN_SDK_ROOT)/lib/aarch64-android +endif +ifeq ($(wildcard $(QNN_TARGET_LIB)),) +ifeq ($(MAKECMDGOALS),htp_aarch64) +$(error "ERROR: QNN_TARGET_LIB is needed to compile package for aarch64") +else ifeq ($(MAKECMDGOALS),all) +$(info "WARNING:QNN_TARGET_LIB may need to be defined to compile packages") +endif +endif + +ifndef HEXAGON_SDK_ROOT +$(error "ERROR: HEXAGON_SDK_ROOT is not set. Hexagon-SDK path must be set to the latest hexagon-sdk-x.y.z") +endif + +ifeq ($(wildcard $(HEXAGON_SDK_ROOT)),) +$(error "ERROR: HEXAGON_SDK_ROOT is not set correctly. Please set HEXAGON_SDK_ROOT to latest hexagon-sdk-X.Y.Z path") +endif + +HEXAGON_SDK_BASE := $(dir $(HEXAGON_SDK_ROOT)) + +$(info "HEXAGON_SDK_ROOT is [${HEXAGON_SDK_ROOT}]") +# Users should note that the tools version may change between hexagon sdk versions +# Following combination of SDK and Tool version is supported +HEXAGON_SDK_ROOT_V68 := $(HEXAGON_SDK_BASE)/hexagon-sdk-4.2.0 +HEXAGON_SDK_ROOT_V69 := $(HEXAGON_SDK_BASE)/hexagon-sdk-4.3.0 +HEXAGON_SDK_ROOT_V73 := $(HEXAGON_SDK_BASE)/hexagon-sdk-5.4.0 +HEXAGON_SDK_ROOT_V75 := $(HEXAGON_SDK_BASE)/hexagon-sdk-5.4.0 +HEXAGON_SDK_ROOT_V79 := $(HEXAGON_SDK_BASE)/hexagon-sdk-6.0.0 + +#Updated to point to latest sdk to match with libQnnHtp.so +HEXAGON_SDK_ROOT_X86 := $(HEXAGON_SDK_BASE)/hexagon-sdk-6.0.0 +HEXAGON_TOOLS_VERSION_V68 := 8.4.09 +HEXAGON_TOOLS_VERSION_V69 := 8.5.03 +HEXAGON_TOOLS_VERSION_V73 := 8.6.02 +HEXAGON_TOOLS_VERSION_V75 := 8.7.03 +HEXAGON_TOOLS_VERSION_V79 := 8.8.02 + +#Updated to point to latest sdk to match with libQnnHtp.so +HEXAGON_TOOLS_VERSION_X86 := 8.8.02 + +ifndef ANDROID_NDK_ROOT +ifeq ($(MAKECMDGOALS),htp_aarch64) +$(error "ERROR: ANDROID_NDK_ROOT is not set. Android NDK path must be set to compile package for aarch64") +else ifeq ($(MAKECMDGOALS),all) +$(info "WARNING: ANDROID_NDK_ROOT is not set. Android NDK path must be set to compile package for aarch64") +endif +endif + +ifndef PACKAGE_NAME +export +PACKAGE_NAME := $(notdir $(shell pwd)) +$(info "INFO: No package name defined. Using current directory name: $(PACKAGE_NAME) as the package name") +endif + +WORK := build +SRC_DIR := src +OP_SRC_DIR := src/ops +OP_INCLUDE_DIR := ./include +OP_INCLUDES = #$(wildcard $(OP_INCLUDE_DIR)/*.h) user defined if any op specific headers are needed, add -I to common flags +LIBRARY_NAME := libQnn$(PACKAGE_NAME).so +SUPPORTED_TARGETS = x86_64-linux-clang hexagon-v68 hexagon-v69 hexagon-v73 hexagon-v75 hexagon-v79 aarch64-android + + +COMMON_CXX_FLAGS = -std=c++17 -I$(QNN_INCLUDE) -fPIC -Wall -Wreorder -Wno-missing-braces -Wno-unused-function +COMMON_CXX_FLAGS += -Werror -Wno-format -Wno-unused-command-line-argument -fvisibility=default -stdlib=libc++ +COMMON_CXX_FLAGS += -DQNN_API="__attribute__((visibility(\"default\")))" -D__QAIC_HEADER_EXPORT="__attribute__((visibility(\"default\")))" + +X86_LIBNATIVE_RELEASE_DIR := $(HEXAGON_SDK_ROOT_X86)/tools/HEXAGON_Tools/$(HEXAGON_TOOLS_VERSION_X86)/Tools + +# Ensure hexagon sdk tool version can be retrieved +ifeq ($(wildcard $(X86_LIBNATIVE_RELEASE_DIR)/.),) +$(error "Cannot retrieve hexagon tools from: $(X86_LIBNATIVE_RELEASE_DIR). \ + \ + Please check that hexagon tools version is correct. Expected: $(HEXAGON_TOOLS_VERSION_X86)") +endif + +#Check tools for hexagon_v68 are present. +ifeq ($(MAKECMDGOALS),htp_v68) +ifeq ($(wildcard $(HEXAGON_SDK_ROOT_V68)),) +$(error "ERROR: HEXAGON_SDK_ROOT_V68 is set incorrectly. Cannot retrieve $(HEXAGON_SDK_ROOT_V68)") +endif +endif + +ifeq ($(MAKECMDGOALS),htp_v69) +ifeq ($(wildcard $(HEXAGON_SDK_ROOT_V69)),) +$(error "ERROR: HEXAGON_SDK_ROOT_V69 is set incorrectly. Cannot retrieve $(HEXAGON_SDK_ROOT_V69)") +endif +endif + +ifeq ($(MAKECMDGOALS),htp_v73) +ifeq ($(wildcard $(HEXAGON_SDK_ROOT_V73)),) +$(error "ERROR: HEXAGON_SDK_ROOT_V73 is set incorrectly. Cannot retrieve $(HEXAGON_SDK_ROOT_V73)") +endif +endif + +ifeq ($(MAKECMDGOALS),htp_v75) +ifeq ($(wildcard $(HEXAGON_SDK_ROOT_V75)),) +$(error "ERROR: HEXAGON_SDK_ROOT_V75 is set incorrectly. Cannot retrieve $(HEXAGON_SDK_ROOT_V75)") +endif +endif + +#Check tools for hexagon_v79 are present. +ifeq ($(MAKECMDGOALS),htp_v79) +ifeq ($(wildcard $(HEXAGON_SDK_ROOT_V79)),) +$(error "ERROR: HEXAGON_SDK_ROOT_V79 is set incorrectly. Cannot retrieve $(HEXAGON_SDK_ROOT_V79)") +endif +endif + + + +endif +OP_SOURCES = $(wildcard $(OP_SRC_DIR)/*.cpp) +OTHER_SOURCES = $(wildcard $(SRC_DIR)/*.cpp) +HFILES = $(wildcard $(QNN_INCLUDE)/*.h) +HFILES += $(wildcard $(QNN_INCLUDE)/HTP/*.h) +HFILES += $(wildcard $(QNN_INCLUDE)/HTP/core/*.h) +OP_OBJS = $(patsubst $(SRC_DIR)/%,%,$(patsubst %.cpp,%.o,$(OP_SOURCES))) +OTHER_OBJS = $(patsubst $(SRC_DIR)/%,%,$(patsubst %.cpp,%.o,$(OTHER_SOURCES))) + +#======= Assembly ======== +OP_SOURCES_ASM_X86 += $(wildcard $(OP_SRC_DIR)/x86_asm/*.S) +OP_OBJS_ASM_X86 += $(subst /x86_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_X86)))) +OP_SOURCES_ASM_V68 += $(wildcard $(OP_SRC_DIR)/v68_asm/*.S) +OP_OBJS_ASM_V68 += $(subst /v68_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_V68)))) +OP_SOURCES_ASM_V69 += $(wildcard $(OP_SRC_DIR)/v69_asm/*.S) +OP_OBJS_ASM_V69 += $(subst /v69_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_V69)))) +OP_SOURCES_ASM_V73 += $(wildcard $(OP_SRC_DIR)/v73_asm/*.S) +OP_OBJS_ASM_V73 += $(subst /v73_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_V73)))) +OP_SOURCES_ASM_V75 += $(wildcard $(OP_SRC_DIR)/v75_asm/*.S) +OP_OBJS_ASM_V75 += $(subst /v75_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_V75)))) +OP_SOURCES_ASM_V79 += $(wildcard $(OP_SRC_DIR)/v79_asm/*.S) +OP_OBJS_ASM_V79 += $(subst /v79_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_V79)))) + +OP_SOURCES_ASM_ANDROID += $(wildcard $(OP_SRC_DIR)/android_asm/*.S) +OP_OBJS_ASM_ANDROID += $(subst /android_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_ANDROID)))) + + +all: htp_v73 htp_x86 htp_aarch64 + +#============================================================================================================ +# Setup compiler, compiler instructions and linker for x86 +X86_CXX ?= clang++-9 +# Checking if clang++-9 is present. If not switch to clang++ +ifeq ($(shell $(X86_CXX) -v 2>&1 | grep -c "clang version"), 0) + X86_CXX := clang++ +endif +X86_LDFLAGS:= -Wl,--whole-archive -L$(X86_LIBNATIVE_RELEASE_DIR)/libnative/lib -lnative -Wl,--no-whole-archive -lpthread +X86_C_FLAGS := -D__HVXDBL__ -I$(X86_LIBNATIVE_RELEASE_DIR)/libnative/include -ffast-math -DUSE_OS_LINUX +X86_CXX_FLAGS = $(COMMON_CXX_FLAGS) $(X86_C_FLAGS) -fomit-frame-pointer -Wno-invalid-offsetof +linux_objs = +#============================================================================================================ +# Setup compiler, compiler instructions and linker for hexagon +HEXAGON_CXX_FLAGS := $(COMMON_CXX_FLAGS) -mhvx -mhvx-length=128B -mhmx -DUSE_OS_QURT -O2 -Wno-reorder -DPREPARE_DISABLED + +HEXAGON_CXX_FLAGS_V68 := $(HEXAGON_CXX_FLAGS) -mv68 -I$(HEXAGON_SDK_ROOT_V68)/rtos/qurt/computev68/include/qurt -I$(HEXAGON_SDK_ROOT_V68)/rtos/qurt/computev68/include/posix -I$(HEXAGON_SDK_ROOT_V68)/incs -I$(HEXAGON_SDK_ROOT_V68)/incs/stddef +HEXAGON_CXX_FLAGS_V69 := $(HEXAGON_CXX_FLAGS) -mv69 -I$(HEXAGON_SDK_ROOT_V69)/rtos/qurt/computev69/include/qurt -I$(HEXAGON_SDK_ROOT_V69)/rtos/qurt/computev69/include/posix -I$(HEXAGON_SDK_ROOT_V69)/incs -I$(HEXAGON_SDK_ROOT_V69)/incs/stddef +HEXAGON_CXX_FLAGS_V73 := $(HEXAGON_CXX_FLAGS) -mv73 -I$(HEXAGON_SDK_ROOT_V73)/rtos/qurt/computev73/include/qurt -I$(HEXAGON_SDK_ROOT_V73)/rtos/qurt/computev73/include/posix -I$(HEXAGON_SDK_ROOT_V73)/incs -I$(HEXAGON_SDK_ROOT_V73)/incs/stddef +HEXAGON_CXX_FLAGS_V75 := $(HEXAGON_CXX_FLAGS) -mv75 -I$(HEXAGON_SDK_ROOT_V75)/rtos/qurt/computev75/include/qurt -I$(HEXAGON_SDK_ROOT_V75)/rtos/qurt/computev75/include/posix -I$(HEXAGON_SDK_ROOT_V75)/incs -I$(HEXAGON_SDK_ROOT_V75)/incs/stddef +HEXAGON_CXX_FLAGS_V79 := $(HEXAGON_CXX_FLAGS) -mv79 -I$(HEXAGON_SDK_ROOT_V79)/rtos/qurt/computev79/include/qurt -I$(HEXAGON_SDK_ROOT_V79)/rtos/qurt/computev79/include/posix -I$(HEXAGON_SDK_ROOT_V79)/incs -I$(HEXAGON_SDK_ROOT_V79)/incs/stddef + +HEXAGON_CXX_V68 := $(HEXAGON_SDK_ROOT_V68)/tools/HEXAGON_Tools/$(HEXAGON_TOOLS_VERSION_V68)/Tools/bin/hexagon-clang++ +HEXAGON_CXX_V69 := $(HEXAGON_SDK_ROOT_V69)/tools/HEXAGON_Tools/$(HEXAGON_TOOLS_VERSION_V69)/Tools/bin/hexagon-clang++ +HEXAGON_CXX_V73 := $(HEXAGON_SDK_ROOT_V73)/tools/HEXAGON_Tools/$(HEXAGON_TOOLS_VERSION_V73)/Tools/bin/hexagon-clang++ +HEXAGON_CXX_V75 := $(HEXAGON_SDK_ROOT_V75)/tools/HEXAGON_Tools/$(HEXAGON_TOOLS_VERSION_V75)/Tools/bin/hexagon-clang++ +HEXAGON_CXX_V79 := $(HEXAGON_SDK_ROOT_V79)/tools/HEXAGON_Tools/$(HEXAGON_TOOLS_VERSION_V79)/Tools/bin/hexagon-clang++ + + +HEX_LDFLAGS = +hexagon_objs = +#============================================================================================================ +# Setup compiler, compiler instructions and linker for aarch64 +AARCH64_C__FLAGS = -D__HVXDBL__ -I$(X86_LIBNATIVE_RELEASE_DIR)/libnative/include -ffast-math -DUSE_OS_LINUX -DANDROID +AARCH64_CXX_FLAGS = $(COMMON_CXX_FLAGS) $(AARCH64_C__FLAGS) -fomit-frame-pointer -Wno-invalid-offsetof -Wno-unused-variable -Wno-unused-parameter -Wno-missing-braces -Wno-sign-compare -Wno-unused-private-field -Wno-unused-variable -Wno-ignored-qualifiers -Wno-missing-field-initializers +ARM_CLANG_OPTS =--target=aarch64-none-linux-android21 --sysroot=$(ANDROID_NDK_ROOT)/toolchains/llvm/prebuilt/linux-x86_64/sysroot -stdlib=libc++ -static-libstdc++ +AARCH64_CXX = $(ANDROID_NDK_ROOT)/toolchains/llvm/prebuilt/linux-x86_64/bin/clang++ $(ARM_CLANG_OPTS) +AARCH64_LDFLAGS = -L$(QNN_TARGET_LIB) -lQnnHtp -lQnnHtpPrepare +aarch64_objs = +#============================================================================================================ +# Setup targets and goals + +htp_x86: X86_BUILD + +htp_v68: HEXAGON_BUILD_V68 + +htp_v69: HEXAGON_BUILD_V69 + +htp_v73: HEXAGON_BUILD_V73 + +htp_v75: HEXAGON_BUILD_V75 + +htp_v79: HEXAGON_BUILD_V79 + + + +htp_aarch64: AARCH64_BUILD + +AARCH64_BUILD: $(WORK)/aarch64-android/$(LIBRARY_NAME) + +HEXAGON_BUILD_V68: $(WORK)/hexagon-v68/$(LIBRARY_NAME) + +HEXAGON_BUILD_V69: $(WORK)/hexagon-v69/$(LIBRARY_NAME) + +HEXAGON_BUILD_V73: $(WORK)/hexagon-v73/$(LIBRARY_NAME) + +HEXAGON_BUILD_V75: $(WORK)/hexagon-v75/$(LIBRARY_NAME) + +HEXAGON_BUILD_V79: $(WORK)/hexagon-v79/$(LIBRARY_NAME) + + + +X86_BUILD: $(WORK)/x86_64-linux-clang/$(LIBRARY_NAME) + + +define build_objs = +ifneq ($(filter $(2),$(SUPPORTED_TARGETS)),) +$(2)_objs += $(foreach x,$(1),$(WORK)/$(2)/$(x)) +else +$$(error "Unknown target option provided: $(2): Supported targets are: $(SUPPORTED_TARGETS)") +endif +endef + +$(eval $(call build_objs,$(OTHER_OBJS),x86_64-linux-clang)) +$(eval $(call build_objs,$(OP_OBJS),x86_64-linux-clang)) +$(eval $(call build_objs,$(OP_OBJS_ASM_X86),x86_64-linux-clang)) +$(eval $(call build_objs,$(OTHER_OBJS),hexagon-v68)) +$(eval $(call build_objs,$(OP_OBJS),hexagon-v68)) +$(eval $(call build_objs,$(OP_OBJS_ASM_V68),hexagon-v68)) +$(eval $(call build_objs,$(OTHER_OBJS),hexagon-v69)) +$(eval $(call build_objs,$(OP_OBJS),hexagon-v69)) +$(eval $(call build_objs,$(OP_OBJS_ASM_V69),hexagon-v69)) +$(eval $(call build_objs,$(OTHER_OBJS),hexagon-v73)) +$(eval $(call build_objs,$(OP_OBJS),hexagon-v73)) +$(eval $(call build_objs,$(OP_OBJS_ASM_V73),hexagon-v73)) +$(eval $(call build_objs,$(OTHER_OBJS),hexagon-v75)) +$(eval $(call build_objs,$(OP_OBJS),hexagon-v75)) +$(eval $(call build_objs,$(OP_OBJS_ASM_V75),hexagon-v75)) +$(eval $(call build_objs,$(OTHER_OBJS),hexagon-v79)) +$(eval $(call build_objs,$(OP_OBJS),hexagon-v79)) +$(eval $(call build_objs,$(OP_OBJS_ASM_V75),hexagon-v79)) + +$(eval $(call build_objs,$(OTHER_OBJS),aarch64-android)) +$(eval $(call build_objs,$(OP_OBJS),aarch64-android)) +$(eval $(call build_objs,$(OP_OBJS_ASM_ANDROID),aarch64-android)) + +# x86 +$(WORK)/x86_64-linux-clang $(WORK)/hexagon-v68 $(WORK)/hexagon-v69 $(WORK)/hexagon-v73 $(WORK)/hexagon-v75 $(WORK)/hexagon-v79 $(WORK)/aarch64-android: + @mkdir -p $@/ops + +$(WORK)/x86_64-linux-clang/%.o: $(SRC_DIR)/%.cpp | $(WORK)/x86_64-linux-clang + $(X86_CXX) $(X86_CXX_FLAGS) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/x86_64-linux-clang/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/x86_64-linux-clang + $(X86_CXX) $(X86_CXX_FLAGS) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/x86_64-linux-clang/ops/%.o: $(OP_SRC_DIR)/x86_asm/%.S | $(WORK)/x86_64-linux-clang + $(X86_CXX) $(X86_CXX_FLAGS) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/x86_64-linux-clang/$(LIBRARY_NAME): $(x86_64-linux-clang_objs) | $(HFILES) + $(X86_CXX) -fPIC -std=c++17 -g -shared -o $@ $^ $(X86_LDFLAGS) + +# v68 +$(WORK)/hexagon-v68/%.o: $(SRC_DIR)/%.cpp | $(WORK)/hexagon-v68 + $(HEXAGON_CXX_V68) $(HEXAGON_CXX_FLAGS_V68) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v68/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/hexagon-v68 + $(HEXAGON_CXX_V68) $(HEXAGON_CXX_FLAGS_V68) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v68/ops/%.o: $(OP_SRC_DIR)/v68_asm/%.S | $(WORK)/hexagon-v68 + $(HEXAGON_CXX_V68) $(HEXAGON_CXX_FLAGS_V68) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v68/$(LIBRARY_NAME): $(hexagon-v68_objs) | $(HFILES) + $(HEXAGON_CXX_V68) -fPIC -std=c++17 -g -shared -o $@ $^ $(HEX_LDFLAGS) + +# v69 +$(WORK)/hexagon-v69/%.o: $(SRC_DIR)/%.cpp | $(WORK)/hexagon-v69 + $(HEXAGON_CXX_V69) $(HEXAGON_CXX_FLAGS_V69) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v69/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/hexagon-v69 + $(HEXAGON_CXX_V69) $(HEXAGON_CXX_FLAGS_V69) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v69/ops/%.o: $(OP_SRC_DIR)/v69_asm/%.S | $(WORK)/hexagon-v69 + $(HEXAGON_CXX_V69) $(HEXAGON_CXX_FLAGS_V69) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v69/$(LIBRARY_NAME): $(hexagon-v69_objs) | $(HFILES) + $(HEXAGON_CXX_V69) -fPIC -std=c++17 -g -shared -o $@ $^ $(HEX_LDFLAGS) + +# v73 +$(WORK)/hexagon-v73/%.o: $(SRC_DIR)/%.cpp | $(WORK)/hexagon-v73 + $(HEXAGON_CXX_V73) $(HEXAGON_CXX_FLAGS_V73) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v73/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/hexagon-v73 + $(HEXAGON_CXX_V73) $(HEXAGON_CXX_FLAGS_V73) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v73/ops/%.o: $(OP_SRC_DIR)/v73_asm/%.S | $(WORK)/hexagon-v73 + $(HEXAGON_CXX_V73) $(HEXAGON_CXX_FLAGS_V73) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v73/$(LIBRARY_NAME): $(hexagon-v73_objs) | $(HFILES) + $(HEXAGON_CXX_V73) -fPIC -std=c++17 -g -shared -o $@ $^ $(HEX_LDFLAGS) + +#v75 +$(WORK)/hexagon-v75/%.o: $(SRC_DIR)/%.cpp | $(WORK)/hexagon-v75 + $(HEXAGON_CXX_V75) $(HEXAGON_CXX_FLAGS_V75) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v75/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/hexagon-v75 + $(HEXAGON_CXX_V75) $(HEXAGON_CXX_FLAGS_V75) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v75/ops/%.o: $(OP_SRC_DIR)/v75_asm/%.S | $(WORK)/hexagon-v75 + $(HEXAGON_CXX_V75) $(HEXAGON_CXX_FLAGS_V75) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v75/$(LIBRARY_NAME): $(hexagon-v75_objs) | $(HFILES) + $(HEXAGON_CXX_V75) -fPIC -std=c++17 -g -shared -o $@ $^ $(HEX_LDFLAGS) + +#v79 +$(WORK)/hexagon-v79/%.o: $(SRC_DIR)/%.cpp | $(WORK)/hexagon-v79 + $(HEXAGON_CXX_V79) $(HEXAGON_CXX_FLAGS_V79) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v79/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/hexagon-v79 + $(HEXAGON_CXX_V79) $(HEXAGON_CXX_FLAGS_V79) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v79/ops/%.o: $(OP_SRC_DIR)/v79_asm/%.S | $(WORK)/hexagon-v79 + $(HEXAGON_CXX_V79) $(HEXAGON_CXX_FLAGS_V79) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v79/$(LIBRARY_NAME): $(hexagon-v79_objs) | $(HFILES) + $(HEXAGON_CXX_V79) -fPIC -std=c++17 -g -shared -o $@ $^ $(HEX_LDFLAGS) + + + +# aarch64 +$(WORK)/aarch64-android/%.o: $(SRC_DIR)/%.cpp | $(WORK)/aarch64-android + $(AARCH64_CXX) $(AARCH64_CXX_FLAGS) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/aarch64-android/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/aarch64-android + $(AARCH64_CXX) $(AARCH64_CXX_FLAGS) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/aarch64-android/ops/%.o: $(OP_SRC_DIR)/android_asm/%.S | $(WORK)/aarch64-android + $(AARCH64_CXX) $(AARCH64_CXX_FLAGS) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/aarch64-android/$(LIBRARY_NAME): $(aarch64-android_objs) | $(HFILES) + $(AARCH64_CXX) -fPIC -std=c++17 -g -shared -o $@ $^ $(AARCH64_LDFLAGS) + +clean: + -rm -rf $(WORK) + +.PHONY: all clean diff --git a/examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage/config/example_op_package_htp.xml b/examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage/config/example_op_package_htp.xml new file mode 100644 index 00000000000..24e17100bf8 --- /dev/null +++ b/examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage/config/example_op_package_htp.xml @@ -0,0 +1,59 @@ + + + + + + ExampleCustomOp + + + ExampleCustomOp for testing OP package registration functionality. + + + + + input + + input activation + + true + QNN_DATATYPE_FLOAT_32 + QNN_DATATYPE_UFIXED_POINT_8 + + 4D + NHWC + a tensor of 4 dimension + + + + + output + + output activation + + true + QNN_DATATYPE_FLOAT_32 + QNN_DATATYPE_UFIXED_POINT_8 + + 4D + NHWC + a tensor of 4 dimension + + + + + HTP + + + + + diff --git a/examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage/src/ExampleOpPackageInterface.cpp b/examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage/src/ExampleOpPackageInterface.cpp new file mode 100644 index 00000000000..8eeca16e982 --- /dev/null +++ b/examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage/src/ExampleOpPackageInterface.cpp @@ -0,0 +1,289 @@ +//============================================================================== +// Auto Generated Code for ExampleOpPackage +//============================================================================== + +#include "HTP/QnnHtpCommon.h" +#include "HTP/core/constraints.h" +#include "HTP/core/op_package_feature_support.h" +#include "HTP/core/op_register_ext.h" +#include "HTP/core/optimize.h" +#include "HTP/core/simple_reg.h" +#include "HTP/core/unique_types.h" +#include "QnnOpPackage.h" +#include "QnnSdkBuildId.h" + +DEFINE_UNIQ_TY() +BEGIN_PKG_OPS_OPTS_LIST() + +/** Note that the order of declarations given here defines the order in which + * ops and graph optimizations are registered to the HTP Core. Append the latest + * OpName at the bottom + */ +DECLARE_PKG_OPS_OPTS_LIST(PKG_ExampleCustomOp) + +END_PKG_OPS_OPTS_LIST() + +// op package info +static constexpr auto sg_packageName = + THIS_PKG_NAME_STR; // package name passed in as compile flag + +static std::array sg_opNames{{"ExampleCustomOp"}}; + +static Qnn_ApiVersion_t sg_sdkApiVersion = QNN_HTP_API_VERSION_INIT; +static QnnOpPackage_Info_t sg_packageInfo = QNN_OP_PACKAGE_INFO_INIT; + +// global data +static QnnOpPackage_GlobalInfrastructure_t sg_globalInfra = + nullptr; // global infrastructure not in use for now +static bool sg_packageInitialized = false; + +/* + * user provided logging call back function + * currently only supported on linux x86-64 and nonrpc versions + * typedef void (*QnnLog_Callback_t)(const char* fmt, + * QnnLog_Level_t level, + * uint64_t timestamp, + * va_list args); + * usage: if(sg_logInitialized && level <= sg_maxLogLevel) + * sg_logCallback(fmt, level, timestamp, args); + * + * for cross rpc versions, skel side user provided logging call back function + * can be defined as part of op packages. maximal log level sg_maxLogLevel + * can be set by Qnn_ErrorHandle_t ExampleOpPackageLogSetLevel(QnnLog_Level_t + * maxLogLevel) + */ +/* + * for alternative logging method provided by HTP core, please refer to log.h + */ +static QnnLog_Callback_t sg_logCallback = + nullptr; // user provided call back function pointer for logging +static QnnLog_Level_t sg_maxLogLevel = + (QnnLog_Level_t)0; // maximal log level used in user provided logging +static bool sg_logInitialized = + false; // tracks whether user provided logging method has been initialized + +/* + * op initialization + * needs to be global in the package + * one initialization per package before any op definitions + * syntax: INIT_PACKAGE_OP_DEF() + */ +INIT_PACKAGE_OP_DEF() + +/* + * optimization initialization + * needs to be global in the package + * one initialization per package before any optimization definitions + * syntax: INIT_PACKAGE_OPTIMIZATION_DEF() + */ +INIT_PACKAGE_OPTIMIZATION_DEF() + +/* + * op parameter order initialization + * needs to be global in the package + * one initialization per package before any op parameter order definitions + * syntax: INIT_PACKAGE_PARAM_ORDER_DEF() + */ +INIT_PACKAGE_PARAM_ORDER_DEF() + +/* + * axis parameter name list + * optional + * needs to be global in the package + * one list per package + * for listing axis parameter names passed into Qnn_AddNode API + * HTP backend auto-adjusts values in axis parameters based on HTP backfilling + * note: HTP backend backfills tensor dimensions to 4 dimensions + * syntax: LIST_PACKAGE_AXIS_PARAMS(...) + * e.g. LIST_PACKAGE_AXIS_PARAMS("Axis", "AXIS", "axis") + */ +// LIST_PACKAGE_AXIS_PARAMS() + +/* + * per-channel quantized op name list + * optional + * needs to be global in the package + * one list per package + * for listing op names which support per-channel quantization + * per-axis quantization info of an op is embeded in axisScaleOffsetEncoding + * inside Qnn_Tensor_t types + * HTP backend only supports per-channel scale ops + * i.e. along last dimension, offset is always zero + * if an op name is marked as having per-channel scale support, and in + * QNN_AddNode, at least one input, parameter, or output has + * QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET type: + * then: + * HTP backend will pass to op implementation function the following: + * output(s), input(s), parameter(s), + * outputPerChannelScale(s), inputPerChannelScale(s), + * paramPerChannelScale(s) + * + * optimization rules can be used to remove extra perChannelScale tensors + * + * syntax: LIST_PACKAGE_PER_CHANNEL_QUANTIZED_OPS(...) + * e.g. LIST_PACKAGE_PER_CHANNEL_QUANTIZED_OPS(sg_op1Name, sg_op2Name) + */ + +// LIST_PACKAGE_PER_CHANNEL_QUANTIZED_OPS() + +/* + * Declare and define the special intialize function for HTP Backend to load + */ +INIT_PKG_CORE_INIT_FUNC() + +/* op package API's */ + +Qnn_ErrorHandle_t ExampleOpPackageInit( + QnnOpPackage_GlobalInfrastructure_t infrastructure) { + if (sg_packageInitialized) + return QNN_OP_PACKAGE_ERROR_LIBRARY_ALREADY_INITIALIZED; + + /* + * op parameter order registration + * registers all defined op parameter orders in the package + * syntax: REGISTER_PACKAGE_PARAM_ORDERS() + */ + REGISTER_PACKAGE_PARAM_ORDERS() + + /* + * op axis parameter name registration + * registers all axis parameter names in the package + * used with LIST_PACKAGE_AXIS_PARAMS(...) + * syntax: REGISTER_PACKAGE_AXIS_PARAMS() + */ + REGISTER_PACKAGE_AXIS_PARAMS() + + /* + * per-channel scale op name registration + * registers all per-channel scale op names in the package + * used with LIST_PACKAGE_PER_CHANNEL_QUANTIZED_OPS(...) + * syntax: REGISTER_PACKAGE_PER_CHANNEL_QUANTIZED_OPS() + */ + REGISTER_PACKAGE_PER_CHANNEL_QUANTIZED_OPS() + + sg_globalInfra = infrastructure; + sg_packageInitialized = true; + return QNN_SUCCESS; +} + +Qnn_ErrorHandle_t ExampleOpPackageGetInfo(const QnnOpPackage_Info_t** info) { + if (!sg_packageInitialized) + return QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED; + if (!info) + return QNN_OP_PACKAGE_ERROR_INVALID_INFO; + + sg_packageInfo = QNN_OP_PACKAGE_INFO_INIT; + sg_packageInfo.packageName = sg_packageName; + sg_packageInfo.operationNames = sg_opNames.data(); + sg_packageInfo.numOperations = sg_opNames.size(); + sg_packageInfo.sdkBuildId = QNN_SDK_BUILD_ID; + sg_packageInfo.sdkApiVersion = &sg_sdkApiVersion; + + *info = &sg_packageInfo; + return QNN_SUCCESS; +} + +Qnn_ErrorHandle_t ExampleOpPackageLogInitialize( + QnnLog_Callback_t callback, + QnnLog_Level_t maxLogLevel) { + if (sg_logInitialized) + return QNN_OP_PACKAGE_ERROR_LIBRARY_ALREADY_INITIALIZED; + if (!callback) + return QNN_LOG_ERROR_INVALID_ARGUMENT; + if (maxLogLevel < QNN_LOG_LEVEL_ERROR) + return QNN_LOG_ERROR_INVALID_ARGUMENT; + sg_logCallback = callback; + sg_maxLogLevel = maxLogLevel; + sg_logInitialized = true; + return QNN_SUCCESS; +} + +Qnn_ErrorHandle_t ExampleOpPackageLogSetLevel(QnnLog_Level_t maxLogLevel) { + if (maxLogLevel < QNN_LOG_LEVEL_ERROR) + return QNN_LOG_ERROR_INVALID_ARGUMENT; + sg_maxLogLevel = maxLogLevel; + return QNN_SUCCESS; +} + +Qnn_ErrorHandle_t ExampleOpPackageLogTerminate() { + if (!sg_logInitialized) + return QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED; + sg_logCallback = nullptr; + sg_maxLogLevel = (QnnLog_Level_t)0; + sg_logInitialized = false; + return QNN_SUCCESS; +} + +Qnn_ErrorHandle_t ExampleOpPackageValidateOpConfig(Qnn_OpConfig_t opConfig) { + if (std::string(sg_packageName) != opConfig.v1.packageName) { + return QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE; + } + + /* auto-generated validation code below + * Check if op config type matches any registered ops + * If a match is found, check number of inputs, outputs and params + */ + if (std::string(opConfig.v1.typeName) == "ExampleCustomOp") { + if (opConfig.v1.numOfParams != 0 || opConfig.v1.numOfInputs != 1 || + opConfig.v1.numOfOutputs != 1) { + return QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE; + } + } else { + return QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE; + } + + /* + * additional validation code here + * */ + + return QNN_SUCCESS; +} + +/* The following three functions in this comment are not called by HTP backend + *for now, no auto-generated implementations are created. Users should see + *example for full function signatures. (version 1.3.0) Qnn_ErrorHandle_t + *ExampleOpPackageCreateKernels (QnnOpPackage_GraphInfrastructure_t + * graphInfrastructure, QnnOpPackage_Node_t node, QnnOpPackage_Kernel_t** + *kernels, uint32_t* numKernels) (version 1.3.0) Qnn_ErrorHandle_t + *ExampleOpPackageFreeKernels (QnnOpPackage_Kernel_t* kernels) + * + * (version 1.4.0) Qnn_ErrorHandle_t ExampleOpPackageCreateOpImpl + *(QnnOpPackage_GraphInfrastructure_t graphInfrastructure, QnnOpPackage_Node_t + *node, QnnOpPackage_OpImpl_t* opImpl) (version 1.4.0) Qnn_ErrorHandle_t + *ExampleOpPackageFreeOpImpl (QnnOpPackage_OpImpl_t opImpl) + */ + +Qnn_ErrorHandle_t ExampleOpPackageTerminate() { + if (!sg_packageInitialized) + return QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED; + + sg_globalInfra = nullptr; + sg_packageInitialized = false; + return QNN_SUCCESS; +} + +#ifdef __cplusplus +extern "C" { +#endif + +/* latest version */ +Qnn_ErrorHandle_t ExampleOpPackageInterfaceProvider( + QnnOpPackage_Interface_t* interface) { + if (!interface) + return QNN_OP_PACKAGE_ERROR_INVALID_ARGUMENT; + interface->interfaceVersion = {1, 4, 0}; + interface->v1_4.init = ExampleOpPackageInit; + interface->v1_4.terminate = ExampleOpPackageTerminate; + interface->v1_4.getInfo = ExampleOpPackageGetInfo; + interface->v1_4.validateOpConfig = ExampleOpPackageValidateOpConfig; + interface->v1_4.createOpImpl = nullptr; + interface->v1_4.freeOpImpl = nullptr; + interface->v1_4.logInitialize = ExampleOpPackageLogInitialize; + interface->v1_4.logSetLevel = ExampleOpPackageLogSetLevel; + interface->v1_4.logTerminate = ExampleOpPackageLogTerminate; + return QNN_SUCCESS; +} + +#ifdef __cplusplus +} +#endif diff --git a/examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage/src/ops/ExampleCustomOp.cpp b/examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage/src/ops/ExampleCustomOp.cpp new file mode 100644 index 00000000000..69a1d3f1d57 --- /dev/null +++ b/examples/qualcomm/custom_op/example_op_package_htp/ExampleOpPackage/src/ops/ExampleCustomOp.cpp @@ -0,0 +1,211 @@ +//============================================================================== +// Auto Generated Code for ExampleOpPackage +//============================================================================== + +#include "HTP/core/constraints.h" +#include "HTP/core/op_package_feature_support.h" +#include "HTP/core/op_register_ext.h" +#include "HTP/core/optimize.h" +#include "HTP/core/simple_reg.h" +#include "QnnOpPackage.h" +#ifdef __hexagon__ +#include "HAP_farf.h" +#else /* __hexagon__ */ +#include +#define FARF(level, fmt, ...) printf(fmt "\n", ##__VA_ARGS__) +#endif /* __hexagon__ */ + +BEGIN_PKG_OP_DEFINITION(PKG_ExampleCustomOp); + +// op execute function declarations +template +GraphStatus examplecustomopImpl(TensorType& out_0, const TensorType& in_0); + +// forward declaration of sample cost function +static float examplecustomopCostFunc(const Op* op); + +/* + * method 1 for defining op, using default cost value (i.e. GLACIAL) and default + * flag (Flags::RESOURCE_HVX) syntax: DEF_PACKAGE_OP(F,OP) e.g. + * DEF_PACKAGE_OP((examplecustomopImpl), "ExampleCustomOp") + */ +DEF_PACKAGE_OP((examplecustomopImpl), "ExampleCustomOp") + +/* + * method 2 for defining op with specified cost value (one of GLACIAL, SNAIL, + * FAST, FREE) and provided flags syntax: + * DEF_PACKAGE_OP_AND_COST_AND_FLAGS(F,OP,COST,...) can use zero or more flags, + * FLAG options are IS_CONST, INHIBIT_CONST_PROP, RESOURCE_HVX, RESOURCE_HMX(not + * supported in external op packages) e.g. + * DEF_PACKAGE_OP_AND_COST_AND_FLAGS((examplecustomopImpl), + * "ExampleCustomOp", SNAIL) + */ + +/* + * method 3 for defining op with cost function pointer and provided flags + * cost function pointer type: typedef float (*cost_function) (const Op * op); + * syntax: DEF_PACKAGE_OP_AND_COST_F_AND_FLAGS(F,OP,COST_F,...) + * e.g. + * DEF_PACKAGE_OP_AND_COST_F_AND_FLAGS((examplecustomopImpl), + * "ExampleCustomOp", examplecustomopCostFunc, Flags::RESOURCE_HVX) + */ + +/* + * optimization definitions + * need to be global in the package + * one definition per optimization + * syntax: + * DEF_PACKAGE_OPTIMIZATION(PRIORITY,MATCHCODE,CONSTRAINTCODE,REPLACECODE) + * PRIORITY predefined values include EARLY(2000), MIDDLE(3000), LATE(4000) + * HTP core provides some replacement functions for op package to use + * for more information about optimization rules, please refer to HTP core + * documentations + */ + +/* + * op parameter order definitions + * need to be global in the package + * one definition per op, and this is optional + * syntax: + * DEF_PACKAGE_PARAM_ORDER(OP,PARAM1,MANDATORY1,DEFAULT1,PARAM2,MANDATORY2,DEFAULT2...) + * one or more parameters can be specified for each op + * order of parameters listed determines the order of parameters passed into op + * execution functions if an op does not have a parameter order definition, + * parameter order passed into Qnn_addNode will be passed into op execution + * functions if an op has a parameter order definition, any parameter passed + * into Qnn_addNode with unlisted name will be abandoned if two or more op + * packages with the same package name will be registered, they cannot list + * conflicting parameter orders + * PARAM refers to parameter name as a string literal + * MANDATORY refers to whether this parameter is required to be provided at + * Qnn_addNode DEFAULT is used when MANDATORY is false if provided as + * Qnn_Param_t*, DEFAULT will be used for graph construction when this parameter + * is not provided at Qnn_addNode if provided as nullptr, graph construction + * will skip this parameter when this parameter is not provided at Qnn_addNode + */ + +/* execute functions for ops */ + +template +GraphStatus examplecustomopImpl(TensorType& out_0, const TensorType& in_0) + +{ + /* + * add code here + * */ + /* + * To have good performance and stability, it is required to avoid heap memory + * allocation in this function. The heap memory allocation includes but not + * limited to calling malloc, operator new, constructing STL container objects + * like std::vector with default allocator, and adding items like calling + * std::vector::push_back to STL container objects with default allocator. + * + * Please check in SDK documentation for more information. + */ + const size_t input_num_elements = in_0.total_storage_elements(); + DTypeScaleOff input_intfc = in_0.get_dtype_intfc(); + + FARF( + ALWAYS, + "[QNN ExecuTorch Op Package test] " + "input num_elem: %zu, dtype %d, scale %f, offset %d", + input_num_elements, + input_intfc.dtype, + input_intfc.scale, + input_intfc.offset); + + if (input_intfc.dtype != DType::Float32 && + input_intfc.dtype != DType::QUInt8) { + FARF( + ALWAYS, + "[QNN ExecuTorch Op Package test]" + "[Error] The datatype of input is %d, not float32(%d) nor uint8(%d)", + input_intfc.dtype, + DType::Float32, + DType::QUInt8); + return GraphStatus::ErrorPrecision; + } + + const size_t output_num_elements = out_0.total_storage_elements(); + DTypeScaleOff out_intfc = out_0.get_dtype_intfc(); + FARF( + ALWAYS, + "[QNN ExecuTorch Op Package test] " + "out num_elem: %zu, dtype %d, scale %f, offset %d", + output_num_elements, + out_intfc.dtype, + out_intfc.scale, + out_intfc.offset); + if (out_intfc.dtype != DType::Float32 && out_intfc.dtype != DType::QUInt8) { + FARF( + ALWAYS, + "[QNN ExecuTorch Op Package test]" + "[Error] The datatype of output is %d, not float32(%d) nor uint8(%d)", + out_intfc.dtype, + DType::Float32, + DType::QUInt8); + return GraphStatus::ErrorPrecision; + } + + if (input_num_elements != output_num_elements) { + FARF( + ALWAYS, + "[QNN ExecuTorch Op Package test]" + "[Error] The number of input and output doesn't match. " + "input_num_elements: %zu, output_num_elements: %zu", + input_num_elements, + output_num_elements); + return GraphStatus::ErrorDimensions; + } + if (input_intfc.dtype == DType::Float32) { + const float* p_input = static_cast(in_0.raw_data_const()); + float* p_output = static_cast(out_0.raw_data()); + const int multiplier = 3; + for (size_t i = 0; i < input_num_elements; ++i) { + p_output[i] = multiplier * p_input[i]; + + FARF( + ALWAYS, + "[QNN ExecuTorch Op Package test]" + "input0[%zu]=%f, multiplier=%d, output[%zu]=%f", + i, + p_input[i], + multiplier, + i, + p_output[i]); + } + } else if (input_intfc.dtype == DType::QUInt8) { + const uint8_t* p_input = static_cast(in_0.raw_data_const()); + uint8_t* p_output = static_cast(out_0.raw_data()); + const int multiplier = 3 * input_intfc.scale / out_intfc.scale; + for (size_t i = 0; i < input_num_elements; ++i) { + p_output[i] = multiplier * p_input[i]; + + FARF( + ALWAYS, + "[QNN ExecuTorch Op Package test]" + "input0[%zu]=%f, multiplier=%d, output[%zu]=%f", + i, + p_input[i], + multiplier, + i, + p_output[i]); + } + } + + return GraphStatus::Success; +} + +__attribute__((unused)) static float examplecustomopCostFunc(const Op* op) { + /* + * add code here + * */ + + float cost = 0.0; // add cost computation here + return cost; +} + +/* At the bottom of the op file, call END_PKG_OP_DEFINITION(), + where is as BEGIN_PKG_OP_DEFINITION +*/ +END_PKG_OP_DEFINITION(PKG_ExampleCustomOp); diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py index b6b801e8230..f20d37c80b1 100755 --- a/examples/qualcomm/utils.py +++ b/examples/qualcomm/utils.py @@ -25,7 +25,10 @@ QnnQuantizer, QuantDtype, ) -from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset +from executorch.backends.qualcomm.serialization.qc_schema import ( + QcomChipset, + QnnExecuTorchOpPackageOptions, +) from executorch.backends.qualcomm.utils.utils import ( generate_htp_compiler_spec, generate_qnn_executorch_compiler_spec, @@ -298,6 +301,7 @@ def build_executorch_binary( qat_training_data=None, online_prepare=False, optrace=False, + op_package_options: QnnExecuTorchOpPackageOptions = None, ): """ A function to generate an ExecuTorch binary for Qualcomm platforms. @@ -319,6 +323,8 @@ def build_executorch_binary( qat_training_data (List[torch.Tensor], optional): A dataset for quantization aware training(QAT). Typically is a pair of tensors, such as [features, ground truth]. online_prepare (bool, optional): Compose QNN graph on device if set to True. optrace (bool, optional): Enable optrace mode for performance analysis if set to True. + op_package_options: Optional structure to specify op packages + loaded and used by the backend. Returns: None: The function writes the output to a specified .pte file. @@ -333,6 +339,7 @@ def build_executorch_binary( optrace=optrace, shared_buffer=shared_buffer, dump_intermediate_outputs=dump_intermediate_outputs, + op_package_options=op_package_options, ) if quant_dtype is not None or custom_quantizer is not None: captured_model = torch.export.export(model, inputs, strict=False).module()