pytorch
diff --git a/‎backends/qualcomm/partition/qnn_partitioner.py‎
Lines changed: 13 additions & 2 deletions b/‎backends/qualcomm/partition/qnn_partitioner.py‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎backends/qualcomm/quantizer/custom_annotation.py‎
Lines changed: 29 additions & 0 deletions b/‎backends/qualcomm/quantizer/custom_annotation.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎examples/qualcomm/oss_scripts/llama/custom_ops/embedding/Makefile‎
Lines changed: 364 additions & 0 deletions b/‎examples/qualcomm/oss_scripts/llama/custom_ops/embedding/Makefile‎
Lines changed: 364 additions & 0 deletions
diff --git a/‎examples/qualcomm/oss_scripts/llama/custom_ops/embedding/config/EmbeddingOpPackageHtp.xml‎
Lines changed: 90 additions & 0 deletions b/‎examples/qualcomm/oss_scripts/llama/custom_ops/embedding/config/EmbeddingOpPackageHtp.xml‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎examples/qualcomm/oss_scripts/llama/custom_ops/embedding/op.py‎
Lines changed: 66 additions & 0 deletions b/‎examples/qualcomm/oss_scripts/llama/custom_ops/embedding/op.py‎
Lines changed: 66 additions & 0 deletions
@@ -111,7 +111,13 @@ def is_node_supported(self, _, node: torch.fx.Node) -> bool:
         return supported
 
     def __del__(self):
-        self.qnn_manager.Destroy()
+        # HTP op package contains some static data structures
+        # which will trigger preparation failure in qnn_preprocess
+        # if libQnnHtp.so is not fully unloaded
+        # ---
+        # currently we'll just keep manager alive for simplicity
+        #self.qnn_manager.Destroy()
+        pass
 
 
 class QnnPartitioner(Partitioner):
@@ -179,7 +185,12 @@ def partition(self, edge_program: torch.export.ExportedProgram) -> PartitionResu
                 # pop certain keys in meta for not affecting the passes in compilation
                 # TODO: need to put property name in common definitions
                 node.meta.pop(QCOM_AXIS_ORDER, "")
-        del self.op_support_checker
+        # HTP op package contains some static data structures
+        # which will trigger preparation failure in qnn_preprocess
+        # if libQnnHtp.so is not fully unloaded
+        # ---
+        # currently we'll just keep manager alive for simplicity
+        #del self.op_support_checker
         return PartitionResult(
             tagged_exported_program=edge_program, partition_tags=self.partition_tags
         )
@@ -9,6 +9,7 @@
 from executorch.backends.qualcomm.quantizer.annotators import QUANT_ANNOTATION_KEY
 from executorch.backends.qualcomm.quantizer.quantizer import (
     get_16a8w_qnn_ptq_config,
+    get_16a4w_qnn_ptq_config,
     get_8a8w_qnn_ptq_config,
     get_ptq_per_channel_quant_config,
     QuantizationConfig,
@@ -53,6 +54,34 @@ def annotate_conv2d(node: Node, quantization_config: QuantizationConfig) -> None
                     )
 
 
+def annotate_linear_16a4w_in_affine_layer(gm: torch.fx.GraphModule) -> None:
+    def annotate_conv2d(node: Node, quantization_config: QuantizationConfig) -> None:
+        input_qspec_map = {}
+        input_act = node.args[0]
+        input_spec = quantization_config.input_activation
+        input_qspec_map[input_act] = input_spec
+
+        weight = node.args[1]
+        input_qspec_map[weight] = quantization_config.weight
+
+        node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation(
+            input_qspec_map=input_qspec_map,
+            output_qspec=quantization_config.output_activation,
+            _annotated=True,
+        )
+
+    quantization_config_16a4w = get_16a4w_qnn_ptq_config(act_observer=MinMaxObserver)
+    for node in gm.graph.nodes:
+        if node.op == "call_function" and node.target == torch.ops.aten.conv2d.default:
+            if "nn_module_stack" in node.meta:
+                module_values_list = list(node.meta["nn_module_stack"].values())
+                full_qualified_name = module_values_list[-1][0]
+                if full_qualified_name == "output.conv":
+                    annotate_conv2d(
+                        node, quantization_config=quantization_config_16a4w
+                    )
+
+
 def annotate_prefill_kv_output(gm: torch.fx.GraphModule, kv_quant_attrs: dict):
     for node in gm.graph.nodes:
         if node.op == "output":
 
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Copyright (c) Qualcomm Innovation Center, Inc.
+All rights reserved
+
+This source code is licensed under the BSD-style license found in the
+LICENSE file in the root directory of this source tree.
+-->
+<OpDefCollection
+        PackageName="EmbeddingOpPackage"
+        Domain="executorch"
+        Version="1.0"
+>
+    <OpDefList>
+        <OpDef>
+            <Name>Embedding</Name>
+            <Description>
+                <Content>implmentation of torch.nn.Embedding</Content>
+            </Description>
+
+            <Reference Source="PyTorch Documentation"
+                       Url="torch.nn.Embedding &lt;https://pytorch.org/docs/stable/generated/torch.nn.Embedding.html&gt;"/>
+
+            <Input>
+                <Name>input</Name>
+                <Description>
+                    <Content>data table</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>2D</Rank>
+                    <Text>a tensor of 2 dimension</Text>
+                </Shape>
+            </Input>
+
+            <Input>
+                <Name>indices</Name>
+                <Description>
+                    <Content>indices to extract data</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>QNN_DATATYPE_INT_32</Datatype>
+                <Shape>
+                    <Rank>ND</Rank>
+                    <Text>a tensor of N dimension</Text>
+                </Shape>
+            </Input>
+
+            <Output>
+                <Name>output</Name>
+                <Description>
+                    <Content>output activation</Content>
+                </Description>
+                <Mandatory>true</Mandatory>
+                <Datatype>BACKEND_SPECIFIC</Datatype>
+                <Shape>
+                    <Rank>ND</Rank>
+                    <Text>a tensor of N dimension</Text>
+                </Shape>
+            </Output>
+
+            <!--This Op is implemented on these Backends-->
+            <SupportedBackend>HTP</SupportedBackend>
+        </OpDef>
+
+    </OpDefList>
+
+    <SupplementalOpDefList Backend="HTP">
+        <SupportedOps>
+            <OpName>Embedding</OpName>
+        </SupportedOps>
+
+        <!--Embedding-->
+        <SupplementalOpDef>
+            <Name>Embedding</Name>
+
+            <Input>
+                <Name>input</Name>
+                <Datatype>QNN_DATATYPE_SFIXED_POINT_8</Datatype>
+            </Input>
+
+            <Output>
+                <Name>output</Name>
+                <Datatype>QNN_DATATYPE_SFIXED_POINT_8</Datatype>
+            </Output>
+        </SupplementalOpDef>
+    </SupplementalOpDefList>
+
+</OpDefCollection>
@@ -0,0 +1,66 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from torch.library import impl, Library
+
+op_lib = Library("qaisw", "DEF")
+op_lib.define("embedding(Tensor table, Tensor indices) -> Tensor")
+
+@impl(op_lib, "embedding", dispatch_key="CompositeExplicitAutograd")
+def embedding_impl(table: torch.Tensor, indices: torch.Tensor) -> torch.Tensor:
+    return table[indices]
+
+
+class CustomEmbedding(torch.nn.Module):
+    def __init__(self, weight):
+        super(CustomEmbedding, self).__init__()
+        self.weight = weight
+
+    def forward(self, indices):
+        return torch.ops.qaisw.embedding.default(self.weight, indices)
+
+
+def custom_embedding_annotation(gm: torch.fx.GraphModule) -> None:
+    import itertools
+    from executorch.backends.qualcomm.quantizer.annotators import (
+        _is_annotated,
+        QUANT_ANNOTATION_KEY,
+    )
+    from executorch.backends.qualcomm.quantizer.qconfig import (
+        get_16a4w_qnn_ptq_config,
+    )
+    from torch.ao.quantization.quantize_pt2e import QuantizationAnnotation, SharedQuantizationSpec
+    from torch.fx import Node
+    from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
+
+    custom_partitions = get_source_partitions(gm.graph, [torch.ops.qaisw.embedding.default])
+    custom_partitions = list(itertools.chain(*custom_partitions.values()))
+    quantization_config = get_16a4w_qnn_ptq_config()
+    for custom_partition in custom_partitions:
+        if len(custom_partition.output_nodes) > 1:
+            raise ValueError("custom partition has more than one output node")
+        custom_node = custom_partition.output_nodes[0]
+        if (
+            custom_node.op != "call_function"
+            or custom_node.target != torch.ops.qaisw.embedding.default
+        ):
+            raise ValueError(f"{custom_node} is not a custom operator")
+        # skip annotation if it is already annotated
+        if _is_annotated([custom_node]):
+            continue
+
+        input_qspec_map = {}
+        input_act = custom_node.args[0]
+        assert isinstance(input_act, Node)
+        input_spec = quantization_config.weight
+        input_qspec_map[input_act] = input_spec
+
+        custom_node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation(
+            input_qspec_map=input_qspec_map,
+            output_qspec=SharedQuantizationSpec((input_act, custom_node)),
+            _annotated=True,
+        )