Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 71 additions & 64 deletions runtime/executor/memory_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,88 +14,95 @@
namespace torch {
namespace executor {

// The memory manager for the executor. It is responsible for keeping
// track of the memory allocation across the lifespan of the executor,
// providing allocators for the following types of objects:
//
// 1. Constants - Constant values in the program. TODO(myuan): we may not
// need it (because the constants are hardcoded in the flatbuffer) but
// we'll account for it for the time being for completeness.
//
// 2. Non-constants - Non-constant values in the program, which may or may not
// be tied to a memory plan.
//
// 3. Runtime structures - Any data needed by the executor itself.
// TODO(myuan): determine whether Delegates need to receive it in the "init"
// method for backends to use directly, or whether memory needs will be
// expressed as an argument to the delegated methods for memory planning to
// account for. Same concerns about dynamic behaviour apply.

// 4. Kernel temporary - This is to provide kernels with a way to create memory,
// without having to request it by adding an extra argument. The extra argument
// approach is fine if/when planning desires to account for such memory, but in
// certain cases a kernel may be fine just leaving this as an implementation
// detail of the kernels itself (but we still want to be able to capture such
// memory allocation).

// In general, this memory manager aims to consolidate all dynamic memory needs
// for program execution. This can allow for heap-less execution (relevant to
// some embedded scenarios), and overall have a tighter control over memory
// utilization. The manager, however, cannot ensure all allocation is accounted
// for since kernel implementations are free to use a separate way to allocate
// memory (e.g. for things like scratch space).
// TODO(myuan): analyze the stack data overhead and lifespan.

class MemoryManager {
/**
* A container class for allocators used during Method load and execution.
*
* This class consolidates all dynamic memory needs for Method load and
* execution. This can allow for heap-based as well as heap-less execution
* (relevant to some embedded scenarios), and overall provides more control over
* memory use.
*
* This class, however, cannot ensure all allocation is accounted for since
* kernel and backend implementations are free to use a separate way to allocate
* memory (e.g., for things like scratch space). But we do suggest that backends
* and kernels use these provided allocators whenever possible.
*/
class MemoryManager final {
public:
MemoryManager(
MemoryAllocator* constant_allocator,
HierarchicalAllocator* non_constant_allocator,
MemoryAllocator* runtime_allocator,
MemoryAllocator* kernel_temporary_allocator)
: constant_allocator_(constant_allocator),
non_constant_allocator_(non_constant_allocator),
runtime_allocator_(runtime_allocator),
kernel_temporary_allocator_(kernel_temporary_allocator) {}
/**
* Constructs a new MemoryManager.
*
* @param[in] method_allocator The allocator to use when loading a Method and
* allocating its internal structures. Must outlive the Method that uses
* it.
* @param[in] planned_memory The memory-planned buffers to use for mutable
* tensor data when executing a Method. Must outlive the Method that uses
* it. May be `nullptr` if the Method does not use any memory-planned
* tensor data. The sizes of the buffers in this HierarchicalAllocator
* must agree with the corresponding
* `MethodMeta::num_memory_planned_buffers()` and
* `MethodMeta::memory_planned_buffer_size(N)` values, which are embedded
* in the Program.
* @param[in] temp_allocator The allocator to use when allocating temporary
* data during kernel or delegate execution. Must outlive the Method that
* uses it. May be `nullptr` if the Method does not use kernels or
* delegates that allocate temporary data. This allocator will be reset
* after every kernel or delegate call during execution.
*/
explicit MemoryManager(
MemoryAllocator* method_allocator,
HierarchicalAllocator* planned_memory = nullptr,
MemoryAllocator* temp_allocator = nullptr)
: method_allocator_(method_allocator),
planned_memory_(planned_memory),
temp_allocator_(temp_allocator) {}

/**
* Returns an allocator for constant values in the program.
* DEPRECATED: Use the constructor without `constant_allocator` instead.
*
* TODO(T162089316): Remove this once all users migrate to the new ctor.
*/
const MemoryAllocator* get_constant_allocator() const {
return constant_allocator_;
}
__ET_DEPRECATED MemoryManager(
__ET_UNUSED MemoryAllocator* constant_allocator,
HierarchicalAllocator* non_constant_allocator,
MemoryAllocator* runtime_allocator,
MemoryAllocator* kernel_temporary_allocator)
: MemoryManager(
/*method_allocator=*/runtime_allocator,
/*planned_memory=*/non_constant_allocator,
/*temp_allocator=*/kernel_temporary_allocator) {}

/**
* Returns an hierarchical allocator for non-constant values in the program.
* Returns the allocator that the runtime will use to allocate internal
* structures while loading a Method. Must not be used after its associated
* Method has been loaded.
*/
HierarchicalAllocator* get_non_constant_allocator() const {
return non_constant_allocator_;
MemoryAllocator* method_allocator() const {
return method_allocator_;
}

/**
* Returns an allocator to be used for any runtime internal structures
* (i.e. not directly program values).
* Returns the memory-planned buffers to use for mutable tensor data.
*/
MemoryAllocator* get_runtime_allocator() const {
return runtime_allocator_;
HierarchicalAllocator* planned_memory() const {
return planned_memory_;
}

/**
* Returns an allocator that kernel implementations can use to
* create temporary memory (i.e. whose lifespan is a single execution
* of the kernel).
* Returns the allocator to use to allocate temporary data during kernel or
* delegate execution.
*
* This allocator will be reset after every kernel or delegate call during
* execution.
*/
MemoryAllocator* get_kernel_temporary_allocator() const {
return kernel_temporary_allocator_;
MemoryAllocator* temp_allocator() const {
return temp_allocator_;
}

virtual ~MemoryManager() {}

private:
const MemoryAllocator* constant_allocator_;
HierarchicalAllocator* non_constant_allocator_;
MemoryAllocator* runtime_allocator_;
MemoryAllocator* kernel_temporary_allocator_;
MemoryAllocator* method_allocator_;
HierarchicalAllocator* planned_memory_;
MemoryAllocator* temp_allocator_;
};

} // namespace executor
Expand Down
35 changes: 18 additions & 17 deletions runtime/executor/method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ class BackendDelegate final {
*
* @param[in] delegate The serialized backend delegate to load.
* @param[in] program The serialized program to load from.
* @param[in] runtime_allocator Allocator for creating runtime C++ objects.
* @param[in] backend_init_context The context pointer to pass to the
* backend's init() method.
* @param[out] out The BackendDelegate to initialize.
*
* @returns Error::Ok if the initialization succeeded, or an error otherwise.
Expand Down Expand Up @@ -212,12 +213,12 @@ struct Chain {
namespace {

Result<InstructionArgs> gen_instruction_arguments(
MemoryAllocator* runtime_allocator,
MemoryAllocator* method_allocator,
EValue* values,
size_t num_args,
const int32_t* arg_idxs) {
EValue** arg_list =
ET_ALLOCATE_LIST_OR_RETURN_ERROR(runtime_allocator, EValue*, num_args);
ET_ALLOCATE_LIST_OR_RETURN_ERROR(method_allocator, EValue*, num_args);
for (size_t i = 0; i < num_args; ++i) {
arg_list[i] = &values[arg_idxs[i]];
}
Expand Down Expand Up @@ -267,7 +268,7 @@ Error Method::parse_values() {
ET_CHECK(flatbuffer_values != nullptr);
size_t n_value = flatbuffer_values->size();
values_ = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
memory_manager_->get_runtime_allocator(), EValue, n_value);
memory_manager_->method_allocator(), EValue, n_value);

// n_value_ counts the number of successfully-initialized values for ~Method()
// to clean up, and is incremented at the bottom of the loop. This makes it
Expand Down Expand Up @@ -299,10 +300,10 @@ Error Method::parse_values() {
// Allocate space for boxed and unboxed list representations using
// values_ as source of truth
auto* evalp_list =
memory_manager_->get_runtime_allocator()->allocateList<EValue*>(
memory_manager_->method_allocator()->allocateList<EValue*>(
items->size());
auto* int_list =
memory_manager_->get_runtime_allocator()->allocateList<int64_t>(
memory_manager_->method_allocator()->allocateList<int64_t>(
items->size());

// initialize boxed list
Expand Down Expand Up @@ -452,9 +453,9 @@ Error Method::resolve_operator(
populateOperatorName(op, kTempBufferSizeForName, operator_name);

// resolve tensor meta
auto runtime_allocator = memory_manager_->get_runtime_allocator();
auto method_allocator = memory_manager_->method_allocator();
TensorMeta* meta =
ET_ALLOCATE_LIST_OR_RETURN_ERROR(runtime_allocator, TensorMeta, n_args);
ET_ALLOCATE_LIST_OR_RETURN_ERROR(method_allocator, TensorMeta, n_args);
size_t count = 0;
for (size_t i = 0; i < n_args; i++) {
EValue* eval = args[i];
Expand All @@ -463,7 +464,7 @@ Error Method::resolve_operator(
auto tensor = eval->toTensor();
meta[count].dtype_ = tensor.scalar_type();
exec_aten::DimOrderType* dim_order_ptr = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
runtime_allocator, exec_aten::DimOrderType, tensor.dim());
method_allocator, exec_aten::DimOrderType, tensor.dim());
size_t size = tensor.dim();
Error err = get_dim_order(tensor, dim_order_ptr, size);
ET_CHECK_OR_RETURN_ERROR(
Expand Down Expand Up @@ -514,7 +515,7 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {
init_state_ =
InitializationState::InitializationFailed; // Until proven otherwise
serialization_plan_ = s_plan;
auto runtime_allocator = memory_manager_->get_runtime_allocator();
auto method_allocator = memory_manager_->method_allocator();

{
// Parse the elements of the values_ array.
Expand All @@ -530,7 +531,7 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {
ET_CHECK(delegates != nullptr);
size_t n_delegate = delegates->size();
delegates_ = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
runtime_allocator, BackendDelegate, n_delegate);
method_allocator, BackendDelegate, n_delegate);

// n_delegate_ counts the number of successfully-initialized delegates for
// ~Method() to clean up, and is incremented at the bottom of the loop. This
Expand All @@ -539,7 +540,7 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {

for (size_t i = 0; i < n_delegate; ++i) {
const auto& delegate = *delegates->Get(i);
BackendInitContext backend_init_context(runtime_allocator);
BackendInitContext backend_init_context(method_allocator);
Error err = BackendDelegate::Init(
delegate, program_, backend_init_context, &delegates_[i]);
if (err != Error::Ok) {
Expand All @@ -559,15 +560,15 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {
n_chains_ = chains->size();

chains_ =
ET_ALLOCATE_LIST_OR_RETURN_ERROR(runtime_allocator, Chain, n_chains_);
ET_ALLOCATE_LIST_OR_RETURN_ERROR(method_allocator, Chain, n_chains_);
int32_t num_instructions_missing_op = 0;
for (size_t i = 0; i < n_chains_; ++i) {
auto s_chain = chains->Get(i);
auto num_instructions = s_chain->instructions()->size();
auto chain_instruction_kernels = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
runtime_allocator, OpFunction, num_instructions);
method_allocator, OpFunction, num_instructions);
auto chain_instruction_arg_lists = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
runtime_allocator, InstructionArgs, num_instructions);
method_allocator, InstructionArgs, num_instructions);

// Set up the argument lists ahead of time and store pointers to them to
// use when the instructions are called
Expand All @@ -579,7 +580,7 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {
const auto arg_idxs =
instruction->instr_args_as_KernelCall()->args();
auto res = gen_instruction_arguments(
runtime_allocator, values_, arg_idxs->size(), arg_idxs->data());
method_allocator, values_, arg_idxs->size(), arg_idxs->data());
if (!res.ok()) {
return res.error();
}
Expand All @@ -600,7 +601,7 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {
const auto arg_idxs =
instruction->instr_args_as_DelegateCall()->args();
auto res = gen_instruction_arguments(
runtime_allocator, values_, arg_idxs->size(), arg_idxs->data());
method_allocator, values_, arg_idxs->size(), arg_idxs->data());
if (!res.ok()) {
return res.error();
}
Expand Down
4 changes: 2 additions & 2 deletions runtime/executor/method_meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,14 +169,14 @@ Result<TensorInfo> MethodMeta::output_tensor_meta(size_t index) const {
static_cast<exec_aten::ScalarType>(tensor_value->scalar_type()));
}

size_t MethodMeta::num_non_const_buffers() const {
size_t MethodMeta::num_memory_planned_buffers() const {
// Index zero is reserved internally, and we hide it from users. The actual
// number of buffers is one fewer than the actual size of this list in the
// program.
return s_plan_->non_const_buffer_sizes()->size() - 1;
}

Result<int64_t> MethodMeta::non_const_buffer_size(size_t index) const {
Result<int64_t> MethodMeta::memory_planned_buffer_size(size_t index) const {
auto num_buffers = this->num_non_const_buffers();
ET_CHECK_OR_RETURN_ERROR(
index >= 0 && index < num_buffers,
Expand Down
24 changes: 19 additions & 5 deletions runtime/executor/method_meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,19 +162,33 @@ class MethodMeta final {
Result<TensorInfo> output_tensor_meta(size_t index) const;

/**
* Get the number of non-constant buffers this method requires.
* Get the number of memory-planned buffers this method requires.
*
* @returns The number of non-constant buffers.
* @returns The number of memory-planned buffers.
*/
size_t num_non_const_buffers() const;
size_t num_memory_planned_buffers() const;

/**
* Get the size in bytes of the specified non-constant buffer.
* Get the size in bytes of the specified memory-planned buffer.
*
* @param[in] index The index of the buffer to look up.
* @returns The size in bytes on success, or an error on failure.
*/
Result<int64_t> non_const_buffer_size(size_t index) const;
Result<int64_t> memory_planned_buffer_size(size_t index) const;

/**
* DEPRECATED: Use num_memory_planned_buffers() instead.
*/
__ET_DEPRECATED size_t num_non_const_buffers() const {
return num_memory_planned_buffers();
}

/**
* DEPRECATED: Use memory_planned_buffer_size() instead.
*/
Result<int64_t> non_const_buffer_size(size_t index) const {
return memory_planned_buffer_size(index);
}

private:
// Let Program create MethodMeta.
Expand Down
4 changes: 2 additions & 2 deletions runtime/executor/tensor_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ parseListOptionalType(
EValue* values_,
MemoryManager* memory_manager) {
auto* evalp_list = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
memory_manager->get_runtime_allocator(), EValue*, value_indices->size());
memory_manager->method_allocator(), EValue*, value_indices->size());

auto* optional_tensor_list = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
memory_manager->get_runtime_allocator(),
memory_manager->method_allocator(),
exec_aten::optional<T>,
value_indices->size());

Expand Down
5 changes: 1 addition & 4 deletions runtime/executor/tensor_parser_aten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,7 @@ Result<at::Tensor> parseTensor(
} else {
// Now that we know how big the tensor is, find and assign its memory.
Result<void*> data_ptr = getTensorDataPtr(
s_tensor,
program,
tensor.nbytes(),
memory_manager->get_non_constant_allocator());
s_tensor, program, tensor.nbytes(), memory_manager->planned_memory());
if (!data_ptr.ok()) {
ET_LOG(Error, "getTensorDataPtr() failed: 0x%" PRIx32, data_ptr.error());
return data_ptr.error();
Expand Down
4 changes: 2 additions & 2 deletions runtime/executor/tensor_parser_exec_aten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ __ET_NODISCARD Result<BoxedEvalueList<exec_aten::Tensor>> parseTensorList(
EXECUTORCH_SCOPE_PROF("TensorParser::parseTensorList");

auto* tensor_list = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
memory_manager->get_runtime_allocator(),
memory_manager->method_allocator(),
exec_aten::Tensor,
tensor_indices->size());
auto* evalp_list = ET_ALLOCATE_LIST_OR_RETURN_ERROR(
memory_manager->get_runtime_allocator(), EValue*, tensor_indices->size());
memory_manager->method_allocator(), EValue*, tensor_indices->size());

// For each tensor index look up the corresponding Tensor (which has been
// already allocated) and stick it in the list.
Expand Down
Loading