Skip to content

Introducing NXP Neutron runtime #10563

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
@@ -271,6 +271,36 @@ jobs:
exit 1
fi

nxp-build-test:
name: nxp-build-test
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-arm-sdk
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"

# Build
cmake -DEXECUTORCH_BUILD_NXP_NEUTRON=ON -Bcmake-out .
cmake --build cmake-out --target executorch_delegate_neutron --config Release

# Build check for the neutron backend library
lib_neutron="cmake-out/backends/nxp/libexecutorch_delegate_neutron.a"
if [ -f $lib_neutron ]; then
echo "Neutron backend library built."
else
echo "Neutron backend library not found!"
exit 1
fi

test-coreml-delegate:
name: test-coreml-delegate
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -485,6 +485,10 @@ if(EXECUTORCH_BUILD_CADENCE)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/cadence)
endif()

if(EXECUTORCH_BUILD_NXP_NEUTRON)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/nxp)
endif()

if(EXECUTORCH_BUILD_COREML)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/coreml)
endif()
18 changes: 18 additions & 0 deletions backends/nxp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright 2024 NXP
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set(
_common_include_directories
${CMAKE_CURRENT_SOURCE_DIR}/../../..
${CMAKE_CURRENT_SOURCE_DIR}/../../runtime/core/portable_type/c10
)
add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)

set(_neutron_sources ${CMAKE_CURRENT_SOURCE_DIR}/runtime/NeutronBackend.cpp )

add_library(executorch_delegate_neutron STATIC ${_neutron_sources})
target_include_directories(
executorch_delegate_neutron PUBLIC ${_common_include_directories}
)
413 changes: 413 additions & 0 deletions backends/nxp/runtime/NeutronBackend.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,413 @@
/*
* Copyright 2024 NXP
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*
* Implementation of the backend for the NXP Neutron NPU.
*/

#include <executorch/runtime/backend/interface.h>
#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/evalue.h>

#include "NeutronDriver.h"
#include "NeutronErrors.h"

using namespace std;

namespace torch {
namespace executor {
namespace neutron {

// All the memory need to be aligned with 16
#define BUFFER_ALIGNMENT 16
#define ALIGN_SIZE(size) \
((size + BUFFER_ALIGNMENT - 1) & (~(BUFFER_ALIGNMENT - 1)))

/* Header schema:
+----------------------------------+-----------------------------------+
| Input TensorFormats length (1B) | Output TensorFormats length (1B) |
+----------------------------------+-----------------------------------+
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
+----------------------------------+-----------------------------------+
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
+----------------------------------+-----------------------------------+
*/
#define ITEM_SIZE 1 // 1 Byte
#define INPUT_TENSOR_FORMAT_LEN_POS 0
#define OUTPUT_TENSOR_FORMAT_LEN_POS 1
#define INPUT_TENSOR_FORMAT_ARRAY_ADDR(base) (base + 2 * ITEM_SIZE)
#define OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(base) \
(base + 2 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS])
#define PAYLOAD_ADDR(base) \
(base + \
ALIGN_SIZE( \
2 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS] + \
base[OUTPUT_TENSOR_FORMAT_LEN_POS]))

// Aggregate neutron model handle and data structures into one.
typedef struct {
int numInputs = 0;
int numOutputs = 0;
uint32_t scratchSize = 0;
NeutronModelConfig mcfg;
NeutronDataConfig dcfg;
NeutronModelHandle nmh = NULL;
const uint8_t* inputTranspositionFlags;
const uint8_t* outputTranspositionFlags;
} NeutronConfig;

// Applied on outputs.
template <typename T>
void transposeToChannelFirst(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK for now, but I wonder is there another way to do this, i.e. set the dim_order on the output and let the portable or someone else take care of this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what @jirioc explained below, I think this could be removed by inserting a transpose node instead of setting a flag

const T* src,
T* dest,
size_t N,
size_t C,
size_t H,
size_t W) {
for (size_t n = 0; n < N; n++) {
for (size_t c = 0; c < C; c++) {
for (size_t h = 0; h < H; h++) {
for (size_t w = 0; w < W; w++) {
dest[n * C * H * W + c * H * W + h * W + w] =
src[n * H * W * C + h * W * C + w * C + c];
}
}
}
}
}

// Applied on inputs.
template <typename T>
void transposeToChannelLast(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here.

const T* src,
T* dest,
size_t N,
size_t C,
size_t H,
size_t W) {
for (size_t n = 0; n < N; n++) {
for (size_t c = 0; c < C; c++) {
for (size_t h = 0; h < H; h++) {
for (size_t w = 0; w < W; w++) {
dest[n * H * W * C + h * W * C + w * C + c] =
src[n * C * H * W + c * H * W + h * W + w];
}
}
}
}
}

// Transpose src buffer in channel first format into dest buffer in channel last
// format, sizes correspond to src dimensions in the Executorch defined tensor
// (which is NCHW), element_size is in Bytes.
void transposeInput(
const void* src,
void* dest,
const ArrayRef<exec_aten::SizesType>& sizes,
size_t element_size) {
size_t length = sizes.size();
if (length < 3) {
return;
}
size_t N = 1;
size_t C = sizes[length - 3];
size_t H = sizes[length - 2];
size_t W = sizes[length - 1];
for (size_t i = 0; i < length - 3; i++) {
N *= sizes[i];
}
switch (element_size) {
case 1:
return transposeToChannelLast<uint8_t>(
static_cast<const uint8_t*>(src),
static_cast<uint8_t*>(dest),
N,
C,
H,
W);
case 2:
return transposeToChannelLast<uint16_t>(
static_cast<const uint16_t*>(src),
static_cast<uint16_t*>(dest),
N,
C,
H,
W);
case 4:
return transposeToChannelLast<uint32_t>(
static_cast<const uint32_t*>(src),
static_cast<uint32_t*>(dest),
N,
C,
H,
W);
case 8:
return transposeToChannelLast<uint64_t>(
static_cast<const uint64_t*>(src),
static_cast<uint64_t*>(dest),
N,
C,
H,
W);
}
}

// Transpose src buffer in channel last format into dest buffer in channel first
// format, sizes correspond to dest dimensions in the Executorch defined tensor
// (which is NCHW), element_size is in Bytes.
void transposeOutput(
const void* src,
void* dest,
const ArrayRef<exec_aten::SizesType>& sizes,
size_t element_size) {
size_t length = sizes.size();
if (length < 3) {
return;
}
size_t N = 1;
size_t C = sizes[length - 3];
size_t H = sizes[length - 2];
size_t W = sizes[length - 1];
for (size_t i = 0; i < length - 3; i++) {
N *= sizes[i];
}
switch (element_size) {
case 1:
return transposeToChannelFirst<uint8_t>(
static_cast<const uint8_t*>(src),
static_cast<uint8_t*>(dest),
N,
C,
H,
W);
case 2:
return transposeToChannelFirst<uint16_t>(
static_cast<const uint16_t*>(src),
static_cast<uint16_t*>(dest),
N,
C,
H,
W);
case 4:
return transposeToChannelFirst<uint32_t>(
static_cast<const uint32_t*>(src),
static_cast<uint32_t*>(dest),
N,
C,
H,
W);
case 8:
return transposeToChannelFirst<uint64_t>(
static_cast<const uint64_t*>(src),
static_cast<uint64_t*>(dest),
N,
C,
H,
W);
}
}

class NeutronBackend final : public PyTorchBackendInterface {
public:
NeutronBackend() {}

~NeutronBackend() = default;

virtual bool is_available() const override {
return true;
}

Result<DelegateHandle*> init(
BackendInitContext& context,
FreeableBuffer* processed,
ArrayRef<CompileSpec> compile_specs) const override {
MemoryAllocator* allocator = context.get_runtime_allocator();

auto* cfg = allocator->allocateInstance<NeutronConfig>();

// The following data is read from the "processed" data blob.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: make this a helper function?

// cfg->numInputs
// cfg->numoutputs
// cfg->mcfg.microcode
// cfg->mcfg.weights
// cfg->mcfg.kernels
const uint8_t* transpositionFlags =
static_cast<const uint8_t*>(processed->data());
int numInputs = transpositionFlags[INPUT_TENSOR_FORMAT_LEN_POS];
int numOutputs = transpositionFlags[OUTPUT_TENSOR_FORMAT_LEN_POS];
cfg->inputTranspositionFlags =
INPUT_TENSOR_FORMAT_ARRAY_ADDR(transpositionFlags);
cfg->outputTranspositionFlags =
OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(transpositionFlags);

const uint32_t* buffer = static_cast<const uint32_t*>(
static_cast<const void*> PAYLOAD_ADDR(transpositionFlags));
uint32_t magicWord = buffer[0];
// Check valid microcode.
if (magicWord != 0x64434D6E) {
ET_LOG(
Error,
"Preprocessed buffer does not contain a valid Neutron microcode");
return Error::InvalidProgram;
}
uint32_t microcodeSize = buffer[6];
uint32_t weightsSize = buffer[7];
cfg->scratchSize = buffer[9];
cfg->numInputs = buffer[11];
cfg->numOutputs = buffer[12];
if (cfg->numInputs != numInputs) {
ET_LOG(
Error,
"Preprocessed buffer does not contain a valid number of inputs");
return Error::InvalidProgram;
}
if (cfg->numOutputs != numOutputs) {
ET_LOG(
Error,
"Preprocessed buffer does not contain a valid number of outputs");
return Error::InvalidProgram;
}
cfg->mcfg.microcode =
static_cast<const uint8_t*>(static_cast<const void*>(buffer));
cfg->mcfg.weights = static_cast<const uint8_t*>(cfg->mcfg.microcode) +
ALIGN_SIZE(microcodeSize);
cfg->mcfg.kernels = static_cast<const uint8_t*>(cfg->mcfg.weights) +
ALIGN_SIZE(weightsSize);

#if (NO_HEAP_USAGE == 0)
// The driver allocates and deallocates place for NeutronModelHandle.
cfg->nmh = NULL;
#else
// Allocate place for NeutronModelHandle.
cfg->nmh = static_cast<NeutronModelHandle>(
allocator->allocate(neutronGetModelContextSize()));
#endif

// Prepare data for through neutron driver.
NeutronError neutronRC =
neutronModelPrepare((const NeutronModelConfig*)&cfg->mcfg, &cfg->nmh);
if (neutronRC != ENONE) {
ET_LOG(
Error,
"Neutron model preparation failed with error code %ld",
neutronRC);
return Error::InvalidProgram;
}

return cfg;
}

Error execute(
BackendExecutionContext& context,
DelegateHandle* input_handle,
EValue** args) const override {
NeutronConfig* cfg = static_cast<NeutronConfig*>(input_handle);

// Allocate place for input and output pointers.
cfg->dcfg.inputs = static_cast<const void**>(
context.allocate(cfg->numInputs * sizeof(void*)));
cfg->dcfg.outputs =
static_cast<void**>(context.allocate(cfg->numOutputs * sizeof(void*)));
cfg->dcfg.outputs[cfg->numOutputs] =
static_cast<void*>(context.allocate(cfg->scratchSize, 16));

// Set inputs and outputs from args.
for (int i = 0; i < cfg->numInputs; i++) {
cfg->dcfg.inputs[i] = args[i]->toTensor().const_data_ptr();
}
for (int i = 0; i < cfg->numOutputs; i++) {
cfg->dcfg.outputs[i] =
args[cfg->numInputs + i]->toTensor().mutable_data_ptr();
}

// Transpose inputs.
for (int i = 0; i < cfg->numInputs; i++) {
if (cfg->inputTranspositionFlags[i]) {
if (args[i]->toTensor().sizes().size() < 3) {
ET_LOG(Error, "Unable to transpose 1D and 2D input to channel last");
return Error::InvalidProgram;
}
// Allocate buffer, the allocator is reset after each PTE instruction.
void* buffer = context.allocate(args[i]->toTensor().nbytes(), 16);
transposeInput(
args[i]->toTensor().const_data_ptr(),
buffer,
args[i]->toTensor().sizes(),
args[i]->toTensor().element_size());
cfg->dcfg.inputs[i] = buffer;
}
}
// Redirect outputs.
for (int i = 0; i < cfg->numOutputs; i++) {
if (cfg->outputTranspositionFlags[i]) {
// Allocate buffer, the allocator is reset after each PTE instruction.
void* buffer =
context.allocate(args[cfg->numInputs + i]->toTensor().nbytes(), 16);
cfg->dcfg.outputs[i] = buffer;
}
}

#ifdef NEUTRON_PROFILE
// TODO: Use trace from BackendExecutionContext.
NeutronTraceConfig trace_config{.traceConfig = 0};
neutronSetTrace(cfg->nmh, &trace_config);
#endif

// Run neutron compute.
NeutronError neutronRC = neutronRunBlocking(cfg->nmh, &cfg->dcfg);
if (neutronRC != ENONE) {
ET_LOG(
Error,
"Neutron model evaluation failed with error code %ld",
neutronRC);
return Error::InvalidProgram;
}

// Transpose outputs.
for (int i = 0; i < cfg->numOutputs; i++) {
if (cfg->outputTranspositionFlags[i]) {
if (args[cfg->numInputs + i]->toTensor().sizes().size() < 3) {
ET_LOG(
Error, "Unable to transpose 1D and 2D output to channel first");
return Error::InvalidProgram;
}
transposeOutput(
cfg->dcfg.outputs[i],
args[cfg->numInputs + i]->toTensor().mutable_data_ptr(),
args[cfg->numInputs + i]->toTensor().sizes(),
args[cfg->numInputs + i]->toTensor().element_size());
}
}

return Error::Ok;
}

void destroy(DelegateHandle* handle) const override {
NeutronConfig* cfg = reinterpret_cast<NeutronConfig*>(handle);

// Unprepare to free resources in neutron driver.
NeutronError neutronRC = neutronModelUnprepare(cfg->nmh);
(void)neutronRC;

// Deallocation is done automatically.
/*
delete[] cfg->dcfg.inputs;
delete[] cfg->dcfg.outputs;
delete cfg;
*/
return;
}
};

namespace {
auto backend = NeutronBackend();
Backend backend_id{"NeutronBackend", &backend};
static auto registered = register_backend(backend_id);
} // namespace

} // namespace neutron
} // namespace executor
} // namespace torch
252 changes: 252 additions & 0 deletions backends/nxp/runtime/NeutronDriver.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
/*
* Copyright 2022-2024 NXP
*
* SPDX-License-Identifier: BSD-3-Clause
*
* Interface for the NXP Neutron NPU driver.
*/

#ifndef NEUTRON_DRIVER_H
#define NEUTRON_DRIVER_H

#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

#include "NeutronErrors.h"

/* Neutron Driver error category codes */
typedef enum ERROR_CATEGORY_DRIVER {
ERROR_CATEGORY_DRIVER_GENERIC, /* Generic error category */
ERROR_CATEGORY_DRIVER_UNSUPPORTED, /* Unsupported function */
ERROR_CATEGORY_DRIVER_UCODE, /* Microcode bad magic or version incompatible.
*/
ERROR_CATEGORY_DRIVER_INVALID, /* Invalid arguments */
ERROR_CATEGORY_DRIVER_BAD_HANDLE, /* Bad inference handle */
ERROR_CATEGORY_DRIVER_NO_MEMORY, /* Not enough memory */
ERROR_CATEGORY_DRIVER_INTERNAL_FAULT, /* Internal error */
ERROR_CATEGORY_DRIVER_UNKNOWN_ARCH, /* Unknown architecture */
ERROR_CATEGORY_DRIVER_TRACE_NOT_RUN, /* Tracing did not run, but trace buffer
was requested. */
ERROR_CATEGORY_DRIVER_TIMEOUT /* Timeout error. */
} ERROR_CATEGORY_DRIVER;

/// Trace configuration to enable kernel level tracing.
#define TRACE_CONFIG_KERNEL_LEVEL (1U << 0)

/// Trace confinguration to enable job level tracing.
#define TRACE_CONFIG_JOB_LEVEL (1U << 1)

// Macro to define where to allocate memory for NeutronCtx
#ifndef NO_HEAP_USAGE
#define NO_HEAP_USAGE 0
#endif

/* Neutron Driver errors */
#define GEN_NEUTRON_DRIVER_ERROR(category, code) \
GEN_NEUTRON_ERROR(ERROR_COMPONENT_DRIVER, category, code)
#define GEN_NEUTRON_DRIVER_GENERIC_ERROR() \
GEN_NEUTRON_DRIVER_ERROR(ERROR_CATEGORY_DRIVER_GENERIC, __LINE__)

/// Type definition for a Neutron model handle. This is an identifier used to
/// uniquely identify a model. The convention is that the value
/// NEUTRON_INVALID_HANDLE handle corresponds to an invalid handle.
typedef void* NeutronModelHandle;

typedef struct {
/// Neutron microcode buffer address.
/// The Neutron microcode is generated by the Neutron converter tool.
/// The microcode buffer, 16 bytes aligned, is allocated and initialized by
/// the application or ML framework. The microcode buffer is passed by
/// reference to the Neutron firmware. The microcode buffer is specific for a
/// given ML model.
const void* microcode;

/// Neutron weights buffer address.
/// The Neutron weights is generated by the Neutron converter tool.
/// The weights buffer, 16 bytes aligned, is allocated and initialized by the
/// application or ML framework. The weights buffer address is passed by
/// reference to the Neutron-firmware. The weights buffer is specific for a
/// given ML model.
const void* weights;

/// Neutron kernels buffer address.
/// The Neutron kernels are generated by the Neutron converter tool.
/// The kernels buffer, 16 bytes aligned, is allocated and initialized by the
/// application or ML framework. The kernels buffer address is passed by
/// reference to the Neutron-firmware. The kernels buffer is specific for a
/// given ML model.
const void* kernels;

/// Timeout seconds for the microcode running.
/// This timeout is the uplimit seconds that a user expect to complete,
/// default 60.
uint32_t timeoutSeconds;

} NeutronModelConfig;

typedef struct {
/// The input buffers of the model.
/// The input buffers are allocated and initialized by the application or ML
/// framework. The input buffers are passed by reference to the Neutron
/// firmware.
const void** inputs;

/// The output buffers of the model.
/// The output buffers are allocated by the application or ML framework.
/// The output buffers are passed by reference to the Neutron firmware.
void** outputs;

/// Scratch buffer required for computing model intermediate results.
/// If NULL, this buffer has to be allocated by the driver.
void* scratch;

/// Scratch buffer required for prefetching model weights from FLASH to SRAM.
/// This buffer is used only for Neutron-C targets when the weight prefetch
/// option was explicitly used. If NULL, this buffer has to be allocated by
/// the driver.
void* scratchWeights;

} NeutronDataConfig;

typedef struct {
/// Sets whether tracing should be executed during firmware run or not.
/// If set to 0, tracing will not run.
/// If set to 1 - kernel level tracing.
/// If set to 2 - job level tracing.
/// If set to 3 - mixed level tracing
uint32_t traceConfig;

/// Buffer to store collected trace data.
/// If it is NULLPTR, driver will allocate the memory, otherwise, application
/// can.
char* traceBuffer;

/// What is the allocated memory for buffer. Needed to check if appending
/// string will be out of bounds. Application should set this, if the buffer
/// is allocated by application, otherwise driver will set the value.
size_t traceBufferSize;
} NeutronTraceConfig;

/// This structure contains the prototypes for functions that have a custom
/// implementation. Any new functions or variables must be added at the end.
typedef struct {
/// This function performs the copying from FLASH to SRAM.
void (*copy)(void* dst, void* src, uint32_t size, uint32_t channel);
/// This is a blocking function that checks if the current copy has finished.
void (*wait)(uint32_t channel);
} NeutronConfig;

/* Invalid handle, returned by neutronModelPrepare() if an error occurred. */
#define NEUTRON_INVALID_HANDLE NULL

/// - Initialize the Neutron Driver library, setting initial values, do memory
/// allocation
/// for internal data structures, do memory mapping.
NeutronError neutronInit();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can these functions be in a neutron namespace?
Where are we calling this specific function to init the lib?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is called in surrounding rtos code, much like ethosu_init


/// - Deinitialize the Neutron Driver library, releasing any resources aquired
/// by neutronInit
NeutronError neutronDeinit();

/// - Prepare Neutron execution for a model with custom firmware.
/// - This function is only available for Neutron-S.
NeutronError neutronCustomPrepare(
uint32_t* inputSize,
int32_t numInputs,
uint32_t* outputSize,
int32_t numOutputs,
const void* firmware,
size_t firmwareSize,
NeutronModelHandle* hdl);

/// - Run Neutron custom firmware and get the results.
/// - This function is only available for Neutron-S.
NeutronError neutronCustomExec(
NeutronModelHandle hdl,
const NeutronDataConfig* neutron_dcfg);

/// - Prepare Neutron execution for a model with the given configuration.
/// - This function only prepares the execution by transferring the parameters
/// to the firmware.
/// - This function allows caching a model and then running the same model but
/// with different
/// input data (assuming the new input data replaces the old input data by
/// reusing the same buffers).
/// - In case external allocated memory shall be used for the ModelHandle, e.g.
/// from the Tensorflow
/// tensor arena, hdl shall be a pointer to the start of the allocated memory
/// block.
// If a pointer to NULL is passed, memory will be allocated by the driver
/// from HEAP. If no HEAP is available, an error will be thrown.
NeutronError neutronModelPrepare(
const NeutronModelConfig* mcfg,
NeutronModelHandle* hdl);

/// - Unprepare Neutron execution handle.
/// - This function releases the internal context data structures and the
/// reserved handle.
NeutronError neutronModelUnprepare(NeutronModelHandle hdl);

/// - Perform Neutron execution in blocking mode.
NeutronError neutronRunBlocking(
NeutronModelHandle hdl,
const NeutronDataConfig* dcfg);

/// - Perform Neutron execution in non-blocking mode.
/// - This functionality is only available for Neutron-S.
NeutronError neutronRunNonBlocking(
NeutronModelHandle hdl,
const NeutronDataConfig* dcfg);

/// - Wait (block) for Neutron completion.
/// - This functionality is only available for Neutron-S.
NeutronError neutronWait(NeutronModelHandle hdl, const NeutronDataConfig* dcfg);

/// - Query if the job is done by Neutron.
/// - This functionality is only available for neutronRunNonBlocking.
NeutronError neutronIsReady(NeutronModelHandle hdl, bool* isReady);

#ifndef NDEBUG
/// - Set tracing information.
void neutronSetTrace(NeutronModelHandle hdl, NeutronTraceConfig* tcfg);

/// - Get tracing result to buffer.
NeutronError
neutronGetTrace(NeutronModelHandle hdl, char** buffer, size_t* size);
#endif

/// - Perform power management to suspend Neutron hardware.
// - This function disables the clock for Neutron.
NeutronError neutronSuspend();

/// - Perform power management to resume Neutron hardware.
// - This function enables the clock for Neutron.
NeutronError neutronResume();

/// - Used to initialize custom API's or variables implemented by external
/// application.
NeutronError neutronSetConfig(NeutronConfig* config);

/// - Used to get NeutronContext size.
size_t neutronGetModelContextSize();

/// - Allocates size bytes and returns a pointer to the allocated memory.
/// The returned pointer address will be a multiple of the alignment.
/// Returns NULL on failure.
/// - alignment: Set to 0 if unsure of alignment requirements.
/// - This function is only available for Neutron-S in the Linux environment.
void* neutronMemAlloc(size_t alignment, size_t size);

/// - Frees the memory buffer pointed to by ptr.
/// - This function is only available for Neutron-S in the Linux environment.
void neutronMemFree(void* ptr);

/// Other functions to control the state of driver/firmware.
#ifdef __cplusplus
}
#endif
#endif // NEUTRON_DRIVER_H
48 changes: 48 additions & 0 deletions backends/nxp/runtime/NeutronErrors.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright 2022-2024 NXP
*
* SPDX-License-Identifier: BSD-3-Clause
*
* Definition of the NXP Neutron NPU driver errors.
*/

#ifndef NEUTRON_ERRORS_H
#define NEUTRON_ERRORS_H

#include <stdint.h>

typedef int32_t NeutronError;

/*
Generate error code.
A code is composed of (from least to most significant bit):
3 bits = component id
5 bits = category id
23 bits = code
1 bit = sign
*/
#define GEN_NEUTRON_ERROR(component, category, code) \
((NeutronError)(((component & 0xF) << 0) | ((category & 0xF) << 3) | \
((code & 0x7FFFFF) << 8)))

#define ENONE 0

#define GET_ERROR_COMPONENT(e) ((e >> 0) & 0x00000007)
#define GET_ERROR_CATEGORY(e) ((e >> 3) & 0x0000001F)
#define GET_ERROR_CODE(e) ((e >> 8) & 0x007FFFFF)

/* Components ids*/
// DO NOT USE 0x0 as component magic number!
typedef enum ERROR_COMPONENT_ID {
ERROR_COMPONENT_LIBRARY = 0x1,
ERROR_COMPONENT_FIRMWARE = 0x2,
ERROR_COMPONENT_DRIVER = 0x3
} ERROR_COMPONENT_ID;

/// Retrieve component name as string from NeutronError code.
char* getNeutronErrorComponent(NeutronError ne);

/// Retrieve catefory as string from NeutronError code.
char* getNeutronErrorCategory(NeutronError ne);

#endif // NEUTRON_ERRORS_H
1 change: 0 additions & 1 deletion kernels/portable/cpu/op_expand_copy.cpp
Original file line number Diff line number Diff line change
@@ -10,7 +10,6 @@
#include <executorch/kernels/portable/cpu/util/copy_ops_util.h>
#include <executorch/kernels/portable/cpu/util/repeat_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include <sys/types.h>

#include <cstring>