-
Notifications
You must be signed in to change notification settings - Fork 623
Introducing NXP Neutron runtime #10563
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Copyright 2024 NXP | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
set( | ||
_common_include_directories | ||
${CMAKE_CURRENT_SOURCE_DIR}/../../.. | ||
${CMAKE_CURRENT_SOURCE_DIR}/../../runtime/core/portable_type/c10 | ||
) | ||
add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS) | ||
|
||
set(_neutron_sources ${CMAKE_CURRENT_SOURCE_DIR}/runtime/NeutronBackend.cpp ) | ||
|
||
add_library(executorch_delegate_neutron STATIC ${_neutron_sources}) | ||
target_include_directories( | ||
executorch_delegate_neutron PUBLIC ${_common_include_directories} | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,413 @@ | ||
/* | ||
* Copyright 2024 NXP | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
* | ||
* Implementation of the backend for the NXP Neutron NPU. | ||
*/ | ||
|
||
#include <executorch/runtime/backend/interface.h> | ||
#include <executorch/runtime/core/error.h> | ||
#include <executorch/runtime/core/evalue.h> | ||
|
||
#include "NeutronDriver.h" | ||
#include "NeutronErrors.h" | ||
|
||
using namespace std; | ||
|
||
namespace torch { | ||
namespace executor { | ||
digantdesai marked this conversation as resolved.
Show resolved
Hide resolved
|
||
namespace neutron { | ||
|
||
// All the memory need to be aligned with 16 | ||
#define BUFFER_ALIGNMENT 16 | ||
#define ALIGN_SIZE(size) \ | ||
((size + BUFFER_ALIGNMENT - 1) & (~(BUFFER_ALIGNMENT - 1))) | ||
|
||
/* Header schema: | ||
+----------------------------------+-----------------------------------+ | ||
| Input TensorFormats length (1B) | Output TensorFormats length (1B) | | ||
+----------------------------------+-----------------------------------+ | ||
| 1st input tensor format (1B) | [nth* input tensor format (1B)] | | ||
+----------------------------------+-----------------------------------+ | ||
| 1st output tensor format (1B) | [nth* output tensor format (1B)] | | ||
+----------------------------------+-----------------------------------+ | ||
*/ | ||
#define ITEM_SIZE 1 // 1 Byte | ||
#define INPUT_TENSOR_FORMAT_LEN_POS 0 | ||
#define OUTPUT_TENSOR_FORMAT_LEN_POS 1 | ||
#define INPUT_TENSOR_FORMAT_ARRAY_ADDR(base) (base + 2 * ITEM_SIZE) | ||
#define OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(base) \ | ||
(base + 2 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS]) | ||
#define PAYLOAD_ADDR(base) \ | ||
(base + \ | ||
ALIGN_SIZE( \ | ||
2 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS] + \ | ||
base[OUTPUT_TENSOR_FORMAT_LEN_POS])) | ||
|
||
// Aggregate neutron model handle and data structures into one. | ||
typedef struct { | ||
int numInputs = 0; | ||
int numOutputs = 0; | ||
uint32_t scratchSize = 0; | ||
NeutronModelConfig mcfg; | ||
NeutronDataConfig dcfg; | ||
NeutronModelHandle nmh = NULL; | ||
const uint8_t* inputTranspositionFlags; | ||
const uint8_t* outputTranspositionFlags; | ||
} NeutronConfig; | ||
|
||
// Applied on outputs. | ||
template <typename T> | ||
void transposeToChannelFirst( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK for now, but I wonder is there another way to do this, i.e. set the dim_order on the output and let the portable or someone else take care of this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From what @jirioc explained below, I think this could be removed by inserting a transpose node instead of setting a flag |
||
const T* src, | ||
T* dest, | ||
size_t N, | ||
size_t C, | ||
size_t H, | ||
size_t W) { | ||
for (size_t n = 0; n < N; n++) { | ||
for (size_t c = 0; c < C; c++) { | ||
for (size_t h = 0; h < H; h++) { | ||
for (size_t w = 0; w < W; w++) { | ||
dest[n * C * H * W + c * H * W + h * W + w] = | ||
src[n * H * W * C + h * W * C + w * C + c]; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Applied on inputs. | ||
template <typename T> | ||
void transposeToChannelLast( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here. |
||
const T* src, | ||
T* dest, | ||
size_t N, | ||
size_t C, | ||
size_t H, | ||
size_t W) { | ||
for (size_t n = 0; n < N; n++) { | ||
for (size_t c = 0; c < C; c++) { | ||
for (size_t h = 0; h < H; h++) { | ||
for (size_t w = 0; w < W; w++) { | ||
dest[n * H * W * C + h * W * C + w * C + c] = | ||
src[n * C * H * W + c * H * W + h * W + w]; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Transpose src buffer in channel first format into dest buffer in channel last | ||
// format, sizes correspond to src dimensions in the Executorch defined tensor | ||
// (which is NCHW), element_size is in Bytes. | ||
void transposeInput( | ||
const void* src, | ||
void* dest, | ||
const ArrayRef<exec_aten::SizesType>& sizes, | ||
size_t element_size) { | ||
size_t length = sizes.size(); | ||
if (length < 3) { | ||
return; | ||
} | ||
size_t N = 1; | ||
size_t C = sizes[length - 3]; | ||
size_t H = sizes[length - 2]; | ||
size_t W = sizes[length - 1]; | ||
for (size_t i = 0; i < length - 3; i++) { | ||
N *= sizes[i]; | ||
} | ||
switch (element_size) { | ||
case 1: | ||
return transposeToChannelLast<uint8_t>( | ||
static_cast<const uint8_t*>(src), | ||
static_cast<uint8_t*>(dest), | ||
N, | ||
C, | ||
H, | ||
W); | ||
case 2: | ||
return transposeToChannelLast<uint16_t>( | ||
static_cast<const uint16_t*>(src), | ||
static_cast<uint16_t*>(dest), | ||
N, | ||
C, | ||
H, | ||
W); | ||
case 4: | ||
return transposeToChannelLast<uint32_t>( | ||
static_cast<const uint32_t*>(src), | ||
static_cast<uint32_t*>(dest), | ||
N, | ||
C, | ||
H, | ||
W); | ||
case 8: | ||
return transposeToChannelLast<uint64_t>( | ||
static_cast<const uint64_t*>(src), | ||
static_cast<uint64_t*>(dest), | ||
N, | ||
C, | ||
H, | ||
W); | ||
} | ||
} | ||
|
||
// Transpose src buffer in channel last format into dest buffer in channel first | ||
// format, sizes correspond to dest dimensions in the Executorch defined tensor | ||
// (which is NCHW), element_size is in Bytes. | ||
void transposeOutput( | ||
const void* src, | ||
void* dest, | ||
const ArrayRef<exec_aten::SizesType>& sizes, | ||
size_t element_size) { | ||
size_t length = sizes.size(); | ||
if (length < 3) { | ||
return; | ||
} | ||
size_t N = 1; | ||
size_t C = sizes[length - 3]; | ||
size_t H = sizes[length - 2]; | ||
size_t W = sizes[length - 1]; | ||
for (size_t i = 0; i < length - 3; i++) { | ||
N *= sizes[i]; | ||
} | ||
switch (element_size) { | ||
case 1: | ||
return transposeToChannelFirst<uint8_t>( | ||
static_cast<const uint8_t*>(src), | ||
static_cast<uint8_t*>(dest), | ||
N, | ||
C, | ||
H, | ||
W); | ||
case 2: | ||
return transposeToChannelFirst<uint16_t>( | ||
static_cast<const uint16_t*>(src), | ||
static_cast<uint16_t*>(dest), | ||
N, | ||
C, | ||
H, | ||
W); | ||
case 4: | ||
return transposeToChannelFirst<uint32_t>( | ||
static_cast<const uint32_t*>(src), | ||
static_cast<uint32_t*>(dest), | ||
N, | ||
C, | ||
H, | ||
W); | ||
case 8: | ||
return transposeToChannelFirst<uint64_t>( | ||
static_cast<const uint64_t*>(src), | ||
static_cast<uint64_t*>(dest), | ||
N, | ||
C, | ||
H, | ||
W); | ||
} | ||
} | ||
|
||
class NeutronBackend final : public PyTorchBackendInterface { | ||
public: | ||
NeutronBackend() {} | ||
|
||
~NeutronBackend() = default; | ||
|
||
virtual bool is_available() const override { | ||
return true; | ||
} | ||
|
||
Result<DelegateHandle*> init( | ||
JakeStevens marked this conversation as resolved.
Show resolved
Hide resolved
|
||
BackendInitContext& context, | ||
FreeableBuffer* processed, | ||
ArrayRef<CompileSpec> compile_specs) const override { | ||
MemoryAllocator* allocator = context.get_runtime_allocator(); | ||
|
||
auto* cfg = allocator->allocateInstance<NeutronConfig>(); | ||
|
||
// The following data is read from the "processed" data blob. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: make this a helper function? |
||
// cfg->numInputs | ||
// cfg->numoutputs | ||
// cfg->mcfg.microcode | ||
// cfg->mcfg.weights | ||
// cfg->mcfg.kernels | ||
const uint8_t* transpositionFlags = | ||
static_cast<const uint8_t*>(processed->data()); | ||
int numInputs = transpositionFlags[INPUT_TENSOR_FORMAT_LEN_POS]; | ||
int numOutputs = transpositionFlags[OUTPUT_TENSOR_FORMAT_LEN_POS]; | ||
cfg->inputTranspositionFlags = | ||
INPUT_TENSOR_FORMAT_ARRAY_ADDR(transpositionFlags); | ||
JakeStevens marked this conversation as resolved.
Show resolved
Hide resolved
|
||
cfg->outputTranspositionFlags = | ||
OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(transpositionFlags); | ||
|
||
const uint32_t* buffer = static_cast<const uint32_t*>( | ||
static_cast<const void*> PAYLOAD_ADDR(transpositionFlags)); | ||
uint32_t magicWord = buffer[0]; | ||
// Check valid microcode. | ||
if (magicWord != 0x64434D6E) { | ||
ET_LOG( | ||
Error, | ||
"Preprocessed buffer does not contain a valid Neutron microcode"); | ||
return Error::InvalidProgram; | ||
} | ||
uint32_t microcodeSize = buffer[6]; | ||
uint32_t weightsSize = buffer[7]; | ||
cfg->scratchSize = buffer[9]; | ||
cfg->numInputs = buffer[11]; | ||
cfg->numOutputs = buffer[12]; | ||
if (cfg->numInputs != numInputs) { | ||
ET_LOG( | ||
Error, | ||
"Preprocessed buffer does not contain a valid number of inputs"); | ||
return Error::InvalidProgram; | ||
} | ||
if (cfg->numOutputs != numOutputs) { | ||
ET_LOG( | ||
Error, | ||
"Preprocessed buffer does not contain a valid number of outputs"); | ||
return Error::InvalidProgram; | ||
} | ||
cfg->mcfg.microcode = | ||
static_cast<const uint8_t*>(static_cast<const void*>(buffer)); | ||
cfg->mcfg.weights = static_cast<const uint8_t*>(cfg->mcfg.microcode) + | ||
ALIGN_SIZE(microcodeSize); | ||
cfg->mcfg.kernels = static_cast<const uint8_t*>(cfg->mcfg.weights) + | ||
ALIGN_SIZE(weightsSize); | ||
|
||
#if (NO_HEAP_USAGE == 0) | ||
// The driver allocates and deallocates place for NeutronModelHandle. | ||
cfg->nmh = NULL; | ||
#else | ||
// Allocate place for NeutronModelHandle. | ||
cfg->nmh = static_cast<NeutronModelHandle>( | ||
allocator->allocate(neutronGetModelContextSize())); | ||
#endif | ||
|
||
// Prepare data for through neutron driver. | ||
NeutronError neutronRC = | ||
neutronModelPrepare((const NeutronModelConfig*)&cfg->mcfg, &cfg->nmh); | ||
if (neutronRC != ENONE) { | ||
ET_LOG( | ||
Error, | ||
"Neutron model preparation failed with error code %ld", | ||
neutronRC); | ||
return Error::InvalidProgram; | ||
} | ||
|
||
return cfg; | ||
} | ||
|
||
Error execute( | ||
BackendExecutionContext& context, | ||
DelegateHandle* input_handle, | ||
EValue** args) const override { | ||
NeutronConfig* cfg = static_cast<NeutronConfig*>(input_handle); | ||
|
||
// Allocate place for input and output pointers. | ||
cfg->dcfg.inputs = static_cast<const void**>( | ||
context.allocate(cfg->numInputs * sizeof(void*))); | ||
cfg->dcfg.outputs = | ||
static_cast<void**>(context.allocate(cfg->numOutputs * sizeof(void*))); | ||
cfg->dcfg.outputs[cfg->numOutputs] = | ||
static_cast<void*>(context.allocate(cfg->scratchSize, 16)); | ||
|
||
// Set inputs and outputs from args. | ||
for (int i = 0; i < cfg->numInputs; i++) { | ||
cfg->dcfg.inputs[i] = args[i]->toTensor().const_data_ptr(); | ||
} | ||
for (int i = 0; i < cfg->numOutputs; i++) { | ||
cfg->dcfg.outputs[i] = | ||
args[cfg->numInputs + i]->toTensor().mutable_data_ptr(); | ||
} | ||
|
||
// Transpose inputs. | ||
for (int i = 0; i < cfg->numInputs; i++) { | ||
JakeStevens marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (cfg->inputTranspositionFlags[i]) { | ||
if (args[i]->toTensor().sizes().size() < 3) { | ||
ET_LOG(Error, "Unable to transpose 1D and 2D input to channel last"); | ||
return Error::InvalidProgram; | ||
} | ||
// Allocate buffer, the allocator is reset after each PTE instruction. | ||
void* buffer = context.allocate(args[i]->toTensor().nbytes(), 16); | ||
transposeInput( | ||
args[i]->toTensor().const_data_ptr(), | ||
buffer, | ||
args[i]->toTensor().sizes(), | ||
args[i]->toTensor().element_size()); | ||
cfg->dcfg.inputs[i] = buffer; | ||
} | ||
} | ||
// Redirect outputs. | ||
for (int i = 0; i < cfg->numOutputs; i++) { | ||
if (cfg->outputTranspositionFlags[i]) { | ||
// Allocate buffer, the allocator is reset after each PTE instruction. | ||
void* buffer = | ||
context.allocate(args[cfg->numInputs + i]->toTensor().nbytes(), 16); | ||
cfg->dcfg.outputs[i] = buffer; | ||
} | ||
} | ||
|
||
#ifdef NEUTRON_PROFILE | ||
// TODO: Use trace from BackendExecutionContext. | ||
NeutronTraceConfig trace_config{.traceConfig = 0}; | ||
neutronSetTrace(cfg->nmh, &trace_config); | ||
#endif | ||
|
||
// Run neutron compute. | ||
NeutronError neutronRC = neutronRunBlocking(cfg->nmh, &cfg->dcfg); | ||
if (neutronRC != ENONE) { | ||
ET_LOG( | ||
Error, | ||
"Neutron model evaluation failed with error code %ld", | ||
neutronRC); | ||
return Error::InvalidProgram; | ||
} | ||
|
||
// Transpose outputs. | ||
for (int i = 0; i < cfg->numOutputs; i++) { | ||
if (cfg->outputTranspositionFlags[i]) { | ||
if (args[cfg->numInputs + i]->toTensor().sizes().size() < 3) { | ||
ET_LOG( | ||
Error, "Unable to transpose 1D and 2D output to channel first"); | ||
return Error::InvalidProgram; | ||
} | ||
transposeOutput( | ||
cfg->dcfg.outputs[i], | ||
args[cfg->numInputs + i]->toTensor().mutable_data_ptr(), | ||
args[cfg->numInputs + i]->toTensor().sizes(), | ||
args[cfg->numInputs + i]->toTensor().element_size()); | ||
} | ||
} | ||
|
||
return Error::Ok; | ||
} | ||
|
||
void destroy(DelegateHandle* handle) const override { | ||
NeutronConfig* cfg = reinterpret_cast<NeutronConfig*>(handle); | ||
|
||
// Unprepare to free resources in neutron driver. | ||
NeutronError neutronRC = neutronModelUnprepare(cfg->nmh); | ||
(void)neutronRC; | ||
|
||
// Deallocation is done automatically. | ||
/* | ||
delete[] cfg->dcfg.inputs; | ||
delete[] cfg->dcfg.outputs; | ||
delete cfg; | ||
*/ | ||
return; | ||
} | ||
}; | ||
|
||
namespace { | ||
auto backend = NeutronBackend(); | ||
Backend backend_id{"NeutronBackend", &backend}; | ||
static auto registered = register_backend(backend_id); | ||
} // namespace | ||
|
||
} // namespace neutron | ||
} // namespace executor | ||
} // namespace torch |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,252 @@ | ||
/* | ||
* Copyright 2022-2024 NXP | ||
* | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
* | ||
* Interface for the NXP Neutron NPU driver. | ||
*/ | ||
|
||
#ifndef NEUTRON_DRIVER_H | ||
#define NEUTRON_DRIVER_H | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
#include <stdbool.h> | ||
#include <stddef.h> | ||
#include <stdint.h> | ||
|
||
#include "NeutronErrors.h" | ||
|
||
/* Neutron Driver error category codes */ | ||
typedef enum ERROR_CATEGORY_DRIVER { | ||
ERROR_CATEGORY_DRIVER_GENERIC, /* Generic error category */ | ||
ERROR_CATEGORY_DRIVER_UNSUPPORTED, /* Unsupported function */ | ||
ERROR_CATEGORY_DRIVER_UCODE, /* Microcode bad magic or version incompatible. | ||
*/ | ||
ERROR_CATEGORY_DRIVER_INVALID, /* Invalid arguments */ | ||
ERROR_CATEGORY_DRIVER_BAD_HANDLE, /* Bad inference handle */ | ||
ERROR_CATEGORY_DRIVER_NO_MEMORY, /* Not enough memory */ | ||
ERROR_CATEGORY_DRIVER_INTERNAL_FAULT, /* Internal error */ | ||
ERROR_CATEGORY_DRIVER_UNKNOWN_ARCH, /* Unknown architecture */ | ||
ERROR_CATEGORY_DRIVER_TRACE_NOT_RUN, /* Tracing did not run, but trace buffer | ||
was requested. */ | ||
ERROR_CATEGORY_DRIVER_TIMEOUT /* Timeout error. */ | ||
} ERROR_CATEGORY_DRIVER; | ||
|
||
/// Trace configuration to enable kernel level tracing. | ||
#define TRACE_CONFIG_KERNEL_LEVEL (1U << 0) | ||
|
||
/// Trace confinguration to enable job level tracing. | ||
#define TRACE_CONFIG_JOB_LEVEL (1U << 1) | ||
|
||
// Macro to define where to allocate memory for NeutronCtx | ||
#ifndef NO_HEAP_USAGE | ||
#define NO_HEAP_USAGE 0 | ||
#endif | ||
|
||
/* Neutron Driver errors */ | ||
#define GEN_NEUTRON_DRIVER_ERROR(category, code) \ | ||
GEN_NEUTRON_ERROR(ERROR_COMPONENT_DRIVER, category, code) | ||
#define GEN_NEUTRON_DRIVER_GENERIC_ERROR() \ | ||
GEN_NEUTRON_DRIVER_ERROR(ERROR_CATEGORY_DRIVER_GENERIC, __LINE__) | ||
|
||
/// Type definition for a Neutron model handle. This is an identifier used to | ||
/// uniquely identify a model. The convention is that the value | ||
/// NEUTRON_INVALID_HANDLE handle corresponds to an invalid handle. | ||
typedef void* NeutronModelHandle; | ||
|
||
typedef struct { | ||
/// Neutron microcode buffer address. | ||
/// The Neutron microcode is generated by the Neutron converter tool. | ||
/// The microcode buffer, 16 bytes aligned, is allocated and initialized by | ||
/// the application or ML framework. The microcode buffer is passed by | ||
/// reference to the Neutron firmware. The microcode buffer is specific for a | ||
/// given ML model. | ||
const void* microcode; | ||
|
||
/// Neutron weights buffer address. | ||
/// The Neutron weights is generated by the Neutron converter tool. | ||
/// The weights buffer, 16 bytes aligned, is allocated and initialized by the | ||
/// application or ML framework. The weights buffer address is passed by | ||
/// reference to the Neutron-firmware. The weights buffer is specific for a | ||
/// given ML model. | ||
const void* weights; | ||
|
||
/// Neutron kernels buffer address. | ||
/// The Neutron kernels are generated by the Neutron converter tool. | ||
/// The kernels buffer, 16 bytes aligned, is allocated and initialized by the | ||
/// application or ML framework. The kernels buffer address is passed by | ||
/// reference to the Neutron-firmware. The kernels buffer is specific for a | ||
/// given ML model. | ||
const void* kernels; | ||
|
||
/// Timeout seconds for the microcode running. | ||
/// This timeout is the uplimit seconds that a user expect to complete, | ||
/// default 60. | ||
uint32_t timeoutSeconds; | ||
|
||
} NeutronModelConfig; | ||
|
||
typedef struct { | ||
/// The input buffers of the model. | ||
/// The input buffers are allocated and initialized by the application or ML | ||
/// framework. The input buffers are passed by reference to the Neutron | ||
/// firmware. | ||
const void** inputs; | ||
|
||
/// The output buffers of the model. | ||
/// The output buffers are allocated by the application or ML framework. | ||
/// The output buffers are passed by reference to the Neutron firmware. | ||
void** outputs; | ||
|
||
/// Scratch buffer required for computing model intermediate results. | ||
/// If NULL, this buffer has to be allocated by the driver. | ||
void* scratch; | ||
|
||
/// Scratch buffer required for prefetching model weights from FLASH to SRAM. | ||
/// This buffer is used only for Neutron-C targets when the weight prefetch | ||
/// option was explicitly used. If NULL, this buffer has to be allocated by | ||
/// the driver. | ||
void* scratchWeights; | ||
|
||
} NeutronDataConfig; | ||
|
||
typedef struct { | ||
/// Sets whether tracing should be executed during firmware run or not. | ||
/// If set to 0, tracing will not run. | ||
/// If set to 1 - kernel level tracing. | ||
/// If set to 2 - job level tracing. | ||
/// If set to 3 - mixed level tracing | ||
uint32_t traceConfig; | ||
|
||
/// Buffer to store collected trace data. | ||
/// If it is NULLPTR, driver will allocate the memory, otherwise, application | ||
/// can. | ||
char* traceBuffer; | ||
|
||
/// What is the allocated memory for buffer. Needed to check if appending | ||
/// string will be out of bounds. Application should set this, if the buffer | ||
/// is allocated by application, otherwise driver will set the value. | ||
size_t traceBufferSize; | ||
} NeutronTraceConfig; | ||
|
||
/// This structure contains the prototypes for functions that have a custom | ||
/// implementation. Any new functions or variables must be added at the end. | ||
typedef struct { | ||
/// This function performs the copying from FLASH to SRAM. | ||
void (*copy)(void* dst, void* src, uint32_t size, uint32_t channel); | ||
/// This is a blocking function that checks if the current copy has finished. | ||
void (*wait)(uint32_t channel); | ||
} NeutronConfig; | ||
|
||
/* Invalid handle, returned by neutronModelPrepare() if an error occurred. */ | ||
#define NEUTRON_INVALID_HANDLE NULL | ||
|
||
/// - Initialize the Neutron Driver library, setting initial values, do memory | ||
/// allocation | ||
/// for internal data structures, do memory mapping. | ||
NeutronError neutronInit(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can these functions be in a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is called in surrounding rtos code, much like ethosu_init |
||
|
||
/// - Deinitialize the Neutron Driver library, releasing any resources aquired | ||
/// by neutronInit | ||
NeutronError neutronDeinit(); | ||
|
||
/// - Prepare Neutron execution for a model with custom firmware. | ||
/// - This function is only available for Neutron-S. | ||
NeutronError neutronCustomPrepare( | ||
uint32_t* inputSize, | ||
int32_t numInputs, | ||
uint32_t* outputSize, | ||
int32_t numOutputs, | ||
const void* firmware, | ||
size_t firmwareSize, | ||
NeutronModelHandle* hdl); | ||
|
||
/// - Run Neutron custom firmware and get the results. | ||
/// - This function is only available for Neutron-S. | ||
NeutronError neutronCustomExec( | ||
NeutronModelHandle hdl, | ||
const NeutronDataConfig* neutron_dcfg); | ||
|
||
/// - Prepare Neutron execution for a model with the given configuration. | ||
/// - This function only prepares the execution by transferring the parameters | ||
/// to the firmware. | ||
/// - This function allows caching a model and then running the same model but | ||
/// with different | ||
/// input data (assuming the new input data replaces the old input data by | ||
/// reusing the same buffers). | ||
/// - In case external allocated memory shall be used for the ModelHandle, e.g. | ||
/// from the Tensorflow | ||
/// tensor arena, hdl shall be a pointer to the start of the allocated memory | ||
/// block. | ||
// If a pointer to NULL is passed, memory will be allocated by the driver | ||
/// from HEAP. If no HEAP is available, an error will be thrown. | ||
NeutronError neutronModelPrepare( | ||
const NeutronModelConfig* mcfg, | ||
NeutronModelHandle* hdl); | ||
|
||
/// - Unprepare Neutron execution handle. | ||
/// - This function releases the internal context data structures and the | ||
/// reserved handle. | ||
NeutronError neutronModelUnprepare(NeutronModelHandle hdl); | ||
|
||
/// - Perform Neutron execution in blocking mode. | ||
NeutronError neutronRunBlocking( | ||
NeutronModelHandle hdl, | ||
const NeutronDataConfig* dcfg); | ||
|
||
/// - Perform Neutron execution in non-blocking mode. | ||
/// - This functionality is only available for Neutron-S. | ||
NeutronError neutronRunNonBlocking( | ||
NeutronModelHandle hdl, | ||
const NeutronDataConfig* dcfg); | ||
|
||
/// - Wait (block) for Neutron completion. | ||
/// - This functionality is only available for Neutron-S. | ||
NeutronError neutronWait(NeutronModelHandle hdl, const NeutronDataConfig* dcfg); | ||
|
||
/// - Query if the job is done by Neutron. | ||
/// - This functionality is only available for neutronRunNonBlocking. | ||
NeutronError neutronIsReady(NeutronModelHandle hdl, bool* isReady); | ||
|
||
#ifndef NDEBUG | ||
/// - Set tracing information. | ||
void neutronSetTrace(NeutronModelHandle hdl, NeutronTraceConfig* tcfg); | ||
|
||
/// - Get tracing result to buffer. | ||
NeutronError | ||
neutronGetTrace(NeutronModelHandle hdl, char** buffer, size_t* size); | ||
#endif | ||
|
||
/// - Perform power management to suspend Neutron hardware. | ||
// - This function disables the clock for Neutron. | ||
NeutronError neutronSuspend(); | ||
|
||
/// - Perform power management to resume Neutron hardware. | ||
// - This function enables the clock for Neutron. | ||
NeutronError neutronResume(); | ||
|
||
/// - Used to initialize custom API's or variables implemented by external | ||
/// application. | ||
NeutronError neutronSetConfig(NeutronConfig* config); | ||
|
||
/// - Used to get NeutronContext size. | ||
size_t neutronGetModelContextSize(); | ||
|
||
/// - Allocates size bytes and returns a pointer to the allocated memory. | ||
/// The returned pointer address will be a multiple of the alignment. | ||
/// Returns NULL on failure. | ||
/// - alignment: Set to 0 if unsure of alignment requirements. | ||
/// - This function is only available for Neutron-S in the Linux environment. | ||
void* neutronMemAlloc(size_t alignment, size_t size); | ||
|
||
/// - Frees the memory buffer pointed to by ptr. | ||
/// - This function is only available for Neutron-S in the Linux environment. | ||
void neutronMemFree(void* ptr); | ||
|
||
/// Other functions to control the state of driver/firmware. | ||
#ifdef __cplusplus | ||
} | ||
#endif | ||
#endif // NEUTRON_DRIVER_H |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/* | ||
* Copyright 2022-2024 NXP | ||
* | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
* | ||
* Definition of the NXP Neutron NPU driver errors. | ||
*/ | ||
|
||
#ifndef NEUTRON_ERRORS_H | ||
#define NEUTRON_ERRORS_H | ||
|
||
#include <stdint.h> | ||
|
||
typedef int32_t NeutronError; | ||
|
||
/* | ||
Generate error code. | ||
A code is composed of (from least to most significant bit): | ||
3 bits = component id | ||
5 bits = category id | ||
23 bits = code | ||
1 bit = sign | ||
*/ | ||
#define GEN_NEUTRON_ERROR(component, category, code) \ | ||
((NeutronError)(((component & 0xF) << 0) | ((category & 0xF) << 3) | \ | ||
((code & 0x7FFFFF) << 8))) | ||
|
||
#define ENONE 0 | ||
|
||
#define GET_ERROR_COMPONENT(e) ((e >> 0) & 0x00000007) | ||
#define GET_ERROR_CATEGORY(e) ((e >> 3) & 0x0000001F) | ||
#define GET_ERROR_CODE(e) ((e >> 8) & 0x007FFFFF) | ||
|
||
/* Components ids*/ | ||
// DO NOT USE 0x0 as component magic number! | ||
typedef enum ERROR_COMPONENT_ID { | ||
ERROR_COMPONENT_LIBRARY = 0x1, | ||
ERROR_COMPONENT_FIRMWARE = 0x2, | ||
ERROR_COMPONENT_DRIVER = 0x3 | ||
} ERROR_COMPONENT_ID; | ||
|
||
/// Retrieve component name as string from NeutronError code. | ||
char* getNeutronErrorComponent(NeutronError ne); | ||
|
||
/// Retrieve catefory as string from NeutronError code. | ||
char* getNeutronErrorCategory(NeutronError ne); | ||
|
||
#endif // NEUTRON_ERRORS_H |
Uh oh!
There was an error while loading. Please reload this page.