rebase QNN IR PR

haowhsu-quic · haowhsu-quic · commit 8468fa73bda7 · 2025-05-01T12:23:39.000+08:00
diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt
@@ -153,12 +153,12 @@ target_link_libraries(qnn_executorch_logging PRIVATE qnn_schema)
 target_link_libraries(qnn_profiler PRIVATE qnn_executorch_logging)
 target_link_libraries(qnn_logger PRIVATE qnn_implementation ${android_log})
 target_link_libraries(qnn_backend PRIVATE qnn_implementation qnn_logger)
-target_link_libraries(qnn_custom_protocol PRIVATE qcir_utils)
+target_link_libraries(qnn_custom_protocol PRIVATE qnn_logger)
 target_link_libraries(
   qnn_device PRIVATE qnn_executorch_logging qnn_implementation qnn_logger
 )
 target_link_libraries(
-  qnn_backend_cache PRIVATE qnn_sys_implementation qcir_utils
+  qnn_backend_cache PRIVATE qnn_sys_implementation
 )
 target_link_libraries(
   qnn_context PRIVATE qnn_implementation qnn_logger qnn_backend qnn_device
@@ -184,7 +184,7 @@ target_link_libraries(
 )
 target_link_libraries(
   qnn_executorch_backend PRIVATE qnn_executorch_header qnn_schema qnn_manager
-                                 executorch_core qcir_utils extension_tensor
+                                 executorch_core extension_tensor
 )
 set_target_properties(
   qnn_executorch_backend PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
@@ -243,7 +243,6 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
             qnn_manager
             qnn_executorch_header
             executorch
-            qcir_utils
             extension_tensor
   )
   target_link_libraries(
diff --git a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
@@ -6,7 +6,6 @@
  * LICENSE file in the root directory of this source tree.
  */
 #pragma once
-#include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
 #include <executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
 #include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
 #include <executorch/backends/qualcomm/runtime/Logging.h>
diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp
@@ -6,7 +6,6 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
 #include <executorch/backends/qualcomm/runtime/QnnManager.h>
 #include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
 #include <executorch/backends/qualcomm/runtime/Utils.h>
@@ -572,126 +571,6 @@ Error QnnManager::CompileDlc() {
   return Error::Ok;
 }
 
-Error QnnManager::CompileQcir() {
-  QnnQcirCustomProtocol qnn_qcir_custom_protocol;
-  auto [status, qcir_fbs_size, tensor_size, qcir_fbs_ptr, tensor_ptr] =
-      qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer(
-          qnn_context_blob_.buffer);
-
-  if (status != Error::Ok) {
-    QNN_EXECUTORCH_LOG_ERROR("Failed to verify QnnQcirCustomProtocol");
-    return Error::Internal;
-  }
-
-  auto context = qcir::GetContext(qcir_fbs_ptr);
-  for (const auto& graph : *context->graphs()) {
-    // qcir tensors to TensorWrapper
-    std::vector<std::shared_ptr<TensorWrapper>> graph_inputs, graph_outputs,
-        tensors;
-    for (const auto& tensor : *graph->tensors()) {
-      tensors.emplace_back(CreateTensorWrapper(ToTensor(
-          tensor, static_cast<uint8_t*>(tensor_ptr) + tensor->offset())));
-      if (tensor->type() == qcir::TensorType::WRITE) {
-        graph_inputs.push_back(tensors.back());
-      } else if (tensor->type() == qcir::TensorType::READ) {
-        graph_outputs.push_back(tensors.back());
-      }
-    }
-    std::vector<std::shared_ptr<OpWrapper>> op_wrappers;
-    // qcir graph node to OpWrapper
-    for (const auto& node : *graph->nodes()) {
-      std::shared_ptr<OpWrapper> op = std::make_shared<OpWrapper>(
-          node->name()->str(),
-          node->package_name()->str(),
-          node->type_name()->str());
-
-      // qcir input tensors to OpWrapper input tensors
-      std::vector<std::shared_ptr<TensorWrapper>> inputs;
-      for (uint32_t index : *node->inputs()) {
-        inputs.push_back(tensors[index]);
-      }
-      op->AddInputTensors(inputs);
-
-      // qcir output tensors to OpWrapper output tensors
-      std::vector<std::shared_ptr<TensorWrapper>> outputs;
-      for (uint32_t index : *node->outputs()) {
-        outputs.push_back(tensors[index]);
-      }
-      op->AddOutputTensors(outputs);
-
-      // qcir operator param to OpWrapper param
-      for (uint32_t index : *node->params()) {
-        const auto& tensor = graph->tensors()->Get(index);
-        std::string name = tensor->name()->str();
-        Qnn_DataType_t dtype = ToDataType(tensor->dtype());
-        const uint8_t* data_ptr =
-            static_cast<uint8_t*>(tensor_ptr) + tensor->offset();
-        if (tensor->shape()->size() != 0) {
-          // add tensor param
-          op->AddTensorParam(
-              name,
-              dtype,
-              tensor->shape()->size(),
-              tensor->shape()->data(),
-              data_ptr);
-        } else {
-          // add scalar param
-          switch (dtype) {
-            case Qnn_DataType_t::QNN_DATATYPE_INT_32:
-              op->AddScalarParam(
-                  name, dtype, *reinterpret_cast<const int32_t*>(data_ptr));
-              break;
-            case Qnn_DataType_t::QNN_DATATYPE_INT_16:
-              op->AddScalarParam(
-                  name, dtype, *reinterpret_cast<const int16_t*>(data_ptr));
-              break;
-            case Qnn_DataType_t::QNN_DATATYPE_INT_8:
-              op->AddScalarParam(name, dtype, static_cast<int8_t>(*data_ptr));
-              break;
-            case Qnn_DataType_t::QNN_DATATYPE_UINT_32:
-              op->AddScalarParam(
-                  name, dtype, *reinterpret_cast<const uint32_t*>(data_ptr));
-              break;
-            case Qnn_DataType_t::QNN_DATATYPE_UINT_16:
-              op->AddScalarParam(
-                  name, dtype, *reinterpret_cast<const uint16_t*>(data_ptr));
-              break;
-            case Qnn_DataType_t::QNN_DATATYPE_UINT_8:
-              op->AddScalarParam(name, dtype, *data_ptr);
-              break;
-            case Qnn_DataType_t::QNN_DATATYPE_FLOAT_32:
-            case Qnn_DataType_t::QNN_DATATYPE_FLOAT_16:
-              op->AddScalarParam(
-                  name, dtype, *reinterpret_cast<const float*>(data_ptr));
-              break;
-            case Qnn_DataType_t::QNN_DATATYPE_BOOL_8:
-              op->AddScalarParam(name, dtype, *data_ptr);
-              break;
-            default:
-              QNN_EXECUTORCH_LOG_ERROR(
-                  "Invalid scalar type: %s", tensor->name()->c_str());
-              break;
-          }
-        }
-      }
-      op_wrappers.emplace_back(std::move(op));
-    }
-    ET_CHECK_OR_RETURN_ERROR(
-        Compile(graph->name()->str(), op_wrappers) == Error::Ok,
-        Internal,
-        "Fail to compile graph from qcir with graph_name: %s",
-        graph->name()->str().c_str());
-    ET_CHECK_OR_RETURN_ERROR(
-        AllocateTensor(graph->name()->str(), graph_inputs, graph_outputs) ==
-            Error::Ok,
-        Internal,
-        "Fail to allocate tensor for qcir with graph_name: %s",
-        graph->name()->str().c_str());
-  }
-
-  return Error::Ok;
-}
-
 Error QnnManager::Compile(
     const std::string& graph_name,
     std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
diff --git a/backends/qualcomm/runtime/QnnManager.h b/backends/qualcomm/runtime/QnnManager.h
@@ -67,7 +67,6 @@ class QnnManager {
   executorch::runtime::Error GetContextBinary(
       QnnExecuTorchContextBinary& qnn_executorch_context_binary);
 
-  executorch::runtime::Error CompileQcir();
   executorch::runtime::Error CompileDlc();
   executorch::runtime::Error Compile(
       const std::string& graph_name,
diff --git a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp
@@ -6,7 +6,6 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendCache.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnCustomProtocol.h>
 namespace executorch {
@@ -129,18 +128,6 @@ Error QnnBackendCache::Configure(const std::vector<std::string>& graph_names) {
       qnn_context_blob_.nbytes);
 
   if (status == Error::Internal) {
-    auto [status, qcir_fbs_size, _, qcir_fbs_ptr, __] =
-        QnnQcirCustomProtocol().DeserializeQcirCustomBuffer(
-            qnn_context_blob_.buffer);
-    if (status == Error::Ok) {
-      // first stage of multi graph
-      state_ = MULTI_GRAPH;
-      auto context = qcir::GetContext(qcir_fbs_ptr);
-      for (const auto& graph : *context->graphs()) {
-        graph_names_.emplace_back(graph->name()->str());
-      }
-      return Error::Ok;
-    }
     // online prepare
     state_ = ONLINE_PREPARE;
   }
diff --git a/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp b/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp
@@ -6,13 +6,13 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnCustomProtocol.h>
 
 namespace executorch {
 namespace backends {
 namespace qnn {
 
+// we still need this for on-device op validation of other backends
 void QnnQcirCustomProtocol::BuildQcirCustomBuffer(
     const QnnExecuTorchContextBinary& qcir_binary,
     const std::vector<uint8_t>& tensor_data) {
diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp
@@ -41,8 +41,7 @@ Error QnnDlcManager::Create() {
       std::make_unique<QnnDevice>(qnn_loaded_backend_, logger_.get());
 
   backend_params_ptr_->qnn_backend_cache_ptr_ =
-      std::make_unique<QnnBackendCache>(
-          qnn_context_blob_, options_->graph_name()->str());
+      std::make_unique<QnnBackendCache>(qnn_context_blob_);
 
   backend_params_ptr_->qnn_context_ptr_ = std::make_unique<IrContext>(
       qnn_loaded_backend_,
@@ -64,8 +63,13 @@ Error QnnDlcManager::Create() {
 Error QnnDlcManager::Configure() {
   ET_CHECK_OR_RETURN_ERROR(
       backend_params_ptr_ != nullptr, Internal, "Failed to load Qnn backend.");
+  std::vector<std::string> graph_names;
+  for (auto name : *options_->graph_name()) {
+    graph_names.emplace_back(name->str());
+  }
   ET_CHECK_OR_RETURN_ERROR(
-      backend_params_ptr_->qnn_backend_cache_ptr_->Configure() == Error::Ok,
+      backend_params_ptr_->qnn_backend_cache_ptr_->Configure(graph_names) ==
+          Error::Ok,
       Internal,
       "Fail to configure Qnn backend cache");
   ET_CHECK_OR_RETURN_ERROR(
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -2535,6 +2535,9 @@ def test_qnn_backend_shared_buffer(self):
         )
 
     def test_qnn_backend_online_prepare(self):
+        if self.enable_x86_64:
+            self.skipTest("TODO: add online_prepare support on host platform")
+
         backend_options = generate_htp_compiler_spec(use_fp16=True)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
             soc_model=self.chipset_table[TestQNN.model],
@@ -3187,6 +3190,9 @@ def test_qnn_backend_shared_buffer(self):
         )
 
     def test_qnn_backend_online_prepare(self):
+        if self.enable_x86_64:
+            self.skipTest("TODO: add online_prepare support on host platform")
+
         backend_options = generate_htp_compiler_spec(use_fp16=False)
         TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
             soc_model=self.chipset_table[TestQNN.model],