pytorch · dbort · Sep 19, 2023 · Sep 19, 2023
diff --git a/runtime/executor/memory_manager.h b/runtime/executor/memory_manager.h
@@ -14,88 +14,95 @@
 namespace torch {
 namespace executor {
 
-// The memory manager for the executor. It is responsible for keeping
-// track of the memory allocation across the lifespan of the executor,
-// providing allocators for the following types of objects:
-//
-// 1. Constants - Constant values in the program. TODO(myuan): we may not
-// need it (because the constants are hardcoded in the flatbuffer) but
-// we'll account for it for the time being for completeness.
-//
-// 2. Non-constants - Non-constant values in the program, which may or may not
-// be tied to a memory plan.
-//
-// 3. Runtime structures - Any data needed by the executor itself.
-// TODO(myuan): determine whether Delegates need to receive it in the "init"
-// method for backends to use directly, or whether memory needs will be
-// expressed as an argument to the delegated methods for memory planning to
-// account for. Same concerns about dynamic behaviour apply.
-
-// 4. Kernel temporary - This is to provide kernels with a way to create memory,
-// without having to request it by adding an extra argument. The extra argument
-// approach is fine if/when planning desires to account for such memory, but in
-// certain cases a kernel may be fine just leaving this as an implementation
-// detail of the kernels itself (but we still want to be able to capture such
-// memory allocation).
-
-// In general, this memory manager aims to consolidate all dynamic memory needs
-// for program execution. This can allow for heap-less execution (relevant to
-// some embedded scenarios), and overall have a tighter control over memory
-// utilization. The manager, however, cannot ensure all allocation is accounted
-// for since kernel implementations are free to use a separate way to allocate
-// memory (e.g. for things like scratch space).
-// TODO(myuan): analyze the stack data overhead and lifespan.
-
-class MemoryManager {
+/**
+ * A container class for allocators used during Method load and execution.
+ *
+ * This class consolidates all dynamic memory needs for Method load and
+ * execution. This can allow for heap-based as well as heap-less execution
+ * (relevant to some embedded scenarios), and overall provides more control over
+ * memory use.
+ *
+ * This class, however, cannot ensure all allocation is accounted for since
+ * kernel and backend implementations are free to use a separate way to allocate
+ * memory (e.g., for things like scratch space). But we do suggest that backends
+ * and kernels use these provided allocators whenever possible.
+ */
+class MemoryManager final {
  public:
-  MemoryManager(
-      MemoryAllocator* constant_allocator,
-      HierarchicalAllocator* non_constant_allocator,
-      MemoryAllocator* runtime_allocator,
-      MemoryAllocator* kernel_temporary_allocator)
-      : constant_allocator_(constant_allocator),
-        non_constant_allocator_(non_constant_allocator),
-        runtime_allocator_(runtime_allocator),
-        kernel_temporary_allocator_(kernel_temporary_allocator) {}
+  /**
+   * Constructs a new MemoryManager.
+   *
+   * @param[in] method_allocator The allocator to use when loading a Method and
+   *     allocating its internal structures. Must outlive the Method that uses
+   *     it.
+   * @param[in] planned_memory The memory-planned buffers to use for mutable
+   *     tensor data when executing a Method. Must outlive the Method that uses
+   *     it. May be `nullptr` if the Method does not use any memory-planned
+   *     tensor data. The sizes of the buffers in this HierarchicalAllocator
+   *     must agree with the corresponding
+   *     `MethodMeta::num_memory_planned_buffers()` and
+   *     `MethodMeta::memory_planned_buffer_size(N)` values, which are embedded
+   *     in the Program.
+   * @param[in] temp_allocator The allocator to use when allocating temporary
+   *     data during kernel or delegate execution. Must outlive the Method that
+   *     uses it. May be `nullptr` if the Method does not use kernels or
+   *     delegates that allocate temporary data. This allocator will be reset
+   *     after every kernel or delegate call during execution.
+   */
+  explicit MemoryManager(
+      MemoryAllocator* method_allocator,
+      HierarchicalAllocator* planned_memory = nullptr,
+      MemoryAllocator* temp_allocator = nullptr)
+      : method_allocator_(method_allocator),
+        planned_memory_(planned_memory),
+        temp_allocator_(temp_allocator) {}
 
   /**
-   * Returns an allocator for constant values in the program.
+   * DEPRECATED: Use the constructor without `constant_allocator` instead.
+   *
+   * TODO(T162089316): Remove this once all users migrate to the new ctor.
    */
-  const MemoryAllocator* get_constant_allocator() const {
-    return constant_allocator_;
-  }
+  __ET_DEPRECATED MemoryManager(
+      __ET_UNUSED MemoryAllocator* constant_allocator,
+      HierarchicalAllocator* non_constant_allocator,
+      MemoryAllocator* runtime_allocator,
+      MemoryAllocator* kernel_temporary_allocator)
+      : MemoryManager(
+            /*method_allocator=*/runtime_allocator,
+            /*planned_memory=*/non_constant_allocator,
+            /*temp_allocator=*/kernel_temporary_allocator) {}
 
   /**
-   * Returns an hierarchical allocator for non-constant values in the program.
+   * Returns the allocator that the runtime will use to allocate internal
+   * structures while loading a Method. Must not be used after its associated
+   * Method has been loaded.
    */
-  HierarchicalAllocator* get_non_constant_allocator() const {
-    return non_constant_allocator_;
+  MemoryAllocator* method_allocator() const {
+    return method_allocator_;
   }
 
   /**
-   * Returns an allocator to be used for any runtime internal structures
-   * (i.e. not directly program values).
+   * Returns the memory-planned buffers to use for mutable tensor data.
    */
-  MemoryAllocator* get_runtime_allocator() const {
-    return runtime_allocator_;
+  HierarchicalAllocator* planned_memory() const {
+    return planned_memory_;
   }
 
   /**
-   * Returns an allocator that kernel implementations can use to
-   * create temporary memory (i.e. whose lifespan is a single execution
-   * of the kernel).
+   * Returns the allocator to use to allocate temporary data during kernel or
+   * delegate execution.
+   *
+   * This allocator will be reset after every kernel or delegate call during
+   * execution.
    */
-  MemoryAllocator* get_kernel_temporary_allocator() const {
-    return kernel_temporary_allocator_;
+  MemoryAllocator* temp_allocator() const {
+    return temp_allocator_;
   }
 
-  virtual ~MemoryManager() {}
-
  private:
-  const MemoryAllocator* constant_allocator_;
-  HierarchicalAllocator* non_constant_allocator_;
-  MemoryAllocator* runtime_allocator_;
-  MemoryAllocator* kernel_temporary_allocator_;
+  MemoryAllocator* method_allocator_;
+  HierarchicalAllocator* planned_memory_;
+  MemoryAllocator* temp_allocator_;
 };
 
 } // namespace executor

diff --git a/runtime/executor/method.cpp b/runtime/executor/method.cpp
@@ -44,7 +44,8 @@ class BackendDelegate final {
    *
    * @param[in] delegate The serialized backend delegate to load.
    * @param[in] program The serialized program to load from.
-   * @param[in] runtime_allocator Allocator for creating runtime C++ objects.
+   * @param[in] backend_init_context The context pointer to pass to the
+   *     backend's init() method.
    * @param[out] out The BackendDelegate to initialize.
    *
    * @returns Error::Ok if the initialization succeeded, or an error otherwise.
@@ -212,12 +213,12 @@ struct Chain {
 namespace {
 
 Result<InstructionArgs> gen_instruction_arguments(
-    MemoryAllocator* runtime_allocator,
+    MemoryAllocator* method_allocator,
     EValue* values,
     size_t num_args,
     const int32_t* arg_idxs) {
   EValue** arg_list =
-      ET_ALLOCATE_LIST_OR_RETURN_ERROR(runtime_allocator, EValue*, num_args);
+      ET_ALLOCATE_LIST_OR_RETURN_ERROR(method_allocator, EValue*, num_args);
   for (size_t i = 0; i < num_args; ++i) {
     arg_list[i] = &values[arg_idxs[i]];
   }
@@ -267,7 +268,7 @@ Error Method::parse_values() {
   ET_CHECK(flatbuffer_values != nullptr);
   size_t n_value = flatbuffer_values->size();
   values_ = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
-      memory_manager_->get_runtime_allocator(), EValue, n_value);
+      memory_manager_->method_allocator(), EValue, n_value);
 
   // n_value_ counts the number of successfully-initialized values for ~Method()
   // to clean up, and is incremented at the bottom of the loop. This makes it
@@ -299,10 +300,10 @@ Error Method::parse_values() {
         // Allocate space for boxed and unboxed list representations using
         // values_ as source of truth
         auto* evalp_list =
-            memory_manager_->get_runtime_allocator()->allocateList<EValue*>(
+            memory_manager_->method_allocator()->allocateList<EValue*>(
                 items->size());
         auto* int_list =
-            memory_manager_->get_runtime_allocator()->allocateList<int64_t>(
+            memory_manager_->method_allocator()->allocateList<int64_t>(
                 items->size());
 
         // initialize boxed list
@@ -452,9 +453,9 @@ Error Method::resolve_operator(
   populateOperatorName(op, kTempBufferSizeForName, operator_name);
 
   // resolve tensor meta
-  auto runtime_allocator = memory_manager_->get_runtime_allocator();
+  auto method_allocator = memory_manager_->method_allocator();
   TensorMeta* meta =
-      ET_ALLOCATE_LIST_OR_RETURN_ERROR(runtime_allocator, TensorMeta, n_args);
+      ET_ALLOCATE_LIST_OR_RETURN_ERROR(method_allocator, TensorMeta, n_args);
   size_t count = 0;
   for (size_t i = 0; i < n_args; i++) {
     EValue* eval = args[i];
@@ -463,7 +464,7 @@ Error Method::resolve_operator(
       auto tensor = eval->toTensor();
       meta[count].dtype_ = tensor.scalar_type();
       exec_aten::DimOrderType* dim_order_ptr = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
-          runtime_allocator, exec_aten::DimOrderType, tensor.dim());
+          method_allocator, exec_aten::DimOrderType, tensor.dim());
       size_t size = tensor.dim();
       Error err = get_dim_order(tensor, dim_order_ptr, size);
       ET_CHECK_OR_RETURN_ERROR(
@@ -514,7 +515,7 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {
   init_state_ =
       InitializationState::InitializationFailed; // Until proven otherwise
   serialization_plan_ = s_plan;
-  auto runtime_allocator = memory_manager_->get_runtime_allocator();
+  auto method_allocator = memory_manager_->method_allocator();
 
   {
     // Parse the elements of the values_ array.
@@ -530,7 +531,7 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {
     ET_CHECK(delegates != nullptr);
     size_t n_delegate = delegates->size();
     delegates_ = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
-        runtime_allocator, BackendDelegate, n_delegate);
+        method_allocator, BackendDelegate, n_delegate);
 
     // n_delegate_ counts the number of successfully-initialized delegates for
     // ~Method() to clean up, and is incremented at the bottom of the loop. This
@@ -539,7 +540,7 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {
 
     for (size_t i = 0; i < n_delegate; ++i) {
       const auto& delegate = *delegates->Get(i);
-      BackendInitContext backend_init_context(runtime_allocator);
+      BackendInitContext backend_init_context(method_allocator);
       Error err = BackendDelegate::Init(
           delegate, program_, backend_init_context, &delegates_[i]);
       if (err != Error::Ok) {
@@ -559,15 +560,15 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {
     n_chains_ = chains->size();
 
     chains_ =
-        ET_ALLOCATE_LIST_OR_RETURN_ERROR(runtime_allocator, Chain, n_chains_);
+        ET_ALLOCATE_LIST_OR_RETURN_ERROR(method_allocator, Chain, n_chains_);
     int32_t num_instructions_missing_op = 0;
     for (size_t i = 0; i < n_chains_; ++i) {
       auto s_chain = chains->Get(i);
       auto num_instructions = s_chain->instructions()->size();
       auto chain_instruction_kernels = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
-          runtime_allocator, OpFunction, num_instructions);
+          method_allocator, OpFunction, num_instructions);
       auto chain_instruction_arg_lists = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
-          runtime_allocator, InstructionArgs, num_instructions);
+          method_allocator, InstructionArgs, num_instructions);
 
       // Set up the argument lists ahead of time and store pointers to them to
       // use when the instructions are called
@@ -579,7 +580,7 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {
             const auto arg_idxs =
                 instruction->instr_args_as_KernelCall()->args();
             auto res = gen_instruction_arguments(
-                runtime_allocator, values_, arg_idxs->size(), arg_idxs->data());
+                method_allocator, values_, arg_idxs->size(), arg_idxs->data());
             if (!res.ok()) {
               return res.error();
             }
@@ -600,7 +601,7 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {
             const auto arg_idxs =
                 instruction->instr_args_as_DelegateCall()->args();
             auto res = gen_instruction_arguments(
-                runtime_allocator, values_, arg_idxs->size(), arg_idxs->data());
+                method_allocator, values_, arg_idxs->size(), arg_idxs->data());
             if (!res.ok()) {
               return res.error();
             }

diff --git a/runtime/executor/method_meta.cpp b/runtime/executor/method_meta.cpp
@@ -169,14 +169,14 @@ Result<TensorInfo> MethodMeta::output_tensor_meta(size_t index) const {
       static_cast<exec_aten::ScalarType>(tensor_value->scalar_type()));
 }
 
-size_t MethodMeta::num_non_const_buffers() const {
+size_t MethodMeta::num_memory_planned_buffers() const {
   // Index zero is reserved internally, and we hide it from users. The actual
   // number of buffers is one fewer than the actual size of this list in the
   // program.
   return s_plan_->non_const_buffer_sizes()->size() - 1;
 }
 
-Result<int64_t> MethodMeta::non_const_buffer_size(size_t index) const {
+Result<int64_t> MethodMeta::memory_planned_buffer_size(size_t index) const {
   auto num_buffers = this->num_non_const_buffers();
   ET_CHECK_OR_RETURN_ERROR(
       index >= 0 && index < num_buffers,

diff --git a/runtime/executor/method_meta.h b/runtime/executor/method_meta.h
@@ -162,19 +162,33 @@ class MethodMeta final {
   Result<TensorInfo> output_tensor_meta(size_t index) const;
 
   /**
-   * Get the number of non-constant buffers this method requires.
+   * Get the number of memory-planned buffers this method requires.
    *
-   * @returns The number of non-constant buffers.
+   * @returns The number of memory-planned buffers.
    */
-  size_t num_non_const_buffers() const;
+  size_t num_memory_planned_buffers() const;
 
   /**
-   * Get the size in bytes of the specified non-constant buffer.
+   * Get the size in bytes of the specified memory-planned buffer.
    *
    * @param[in] index The index of the buffer to look up.
    * @returns The size in bytes on success, or an error on failure.
    */
-  Result<int64_t> non_const_buffer_size(size_t index) const;
+  Result<int64_t> memory_planned_buffer_size(size_t index) const;
+
+  /**
+   * DEPRECATED: Use num_memory_planned_buffers() instead.
+   */
+  __ET_DEPRECATED size_t num_non_const_buffers() const {
+    return num_memory_planned_buffers();
+  }
+
+  /**
+   * DEPRECATED: Use memory_planned_buffer_size() instead.
+   */
+  Result<int64_t> non_const_buffer_size(size_t index) const {
+    return memory_planned_buffer_size(index);
+  }
 
  private:
   // Let Program create MethodMeta.

diff --git a/runtime/executor/tensor_parser.h b/runtime/executor/tensor_parser.h
@@ -38,10 +38,10 @@ parseListOptionalType(
     EValue* values_,
     MemoryManager* memory_manager) {
   auto* evalp_list = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
-      memory_manager->get_runtime_allocator(), EValue*, value_indices->size());
+      memory_manager->method_allocator(), EValue*, value_indices->size());
 
   auto* optional_tensor_list = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
-      memory_manager->get_runtime_allocator(),
+      memory_manager->method_allocator(),
       exec_aten::optional<T>,
       value_indices->size());
 

diff --git a/runtime/executor/tensor_parser_aten.cpp b/runtime/executor/tensor_parser_aten.cpp
@@ -84,10 +84,7 @@ Result<at::Tensor> parseTensor(
   } else {
     // Now that we know how big the tensor is, find and assign its memory.
     Result<void*> data_ptr = getTensorDataPtr(
-        s_tensor,
-        program,
-        tensor.nbytes(),
-        memory_manager->get_non_constant_allocator());
+        s_tensor, program, tensor.nbytes(), memory_manager->planned_memory());
     if (!data_ptr.ok()) {
       ET_LOG(Error, "getTensorDataPtr() failed: 0x%" PRIx32, data_ptr.error());
       return data_ptr.error();

diff --git a/runtime/executor/tensor_parser_exec_aten.cpp b/runtime/executor/tensor_parser_exec_aten.cpp
@@ -27,11 +27,11 @@ __ET_NODISCARD Result<BoxedEvalueList<exec_aten::Tensor>> parseTensorList(
   EXECUTORCH_SCOPE_PROF("TensorParser::parseTensorList");
 
   auto* tensor_list = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
-      memory_manager->get_runtime_allocator(),
+      memory_manager->method_allocator(),
       exec_aten::Tensor,
       tensor_indices->size());
   auto* evalp_list = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
-      memory_manager->get_runtime_allocator(), EValue*, tensor_indices->size());
+      memory_manager->method_allocator(), EValue*, tensor_indices->size());
 
   // For each tensor index look up the corresponding Tensor (which has been
   // already allocated) and stick it in the list.